diff options
1340 files changed, 27960 insertions, 24192 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dfb1ac7b6..bb64db91bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -401,6 +401,7 @@ add_subdirectory(utils/count) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) add_subdirectory(utils/yaml-bench) +add_subdirectory(utils/obj2yaml) add_subdirectory(projects) @@ -425,6 +426,20 @@ if( LLVM_INCLUDE_TESTS ) # Windows. add_subdirectory(utils/KillTheDoctor) endif() + + # Add a global check rule now that all subdirectories have been traversed + # and we know the total set of lit testsuites. + get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES) + get_property(LLVM_LIT_PARAMS GLOBAL PROPERTY LLVM_LIT_PARAMS) + get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS) + get_property(LLVM_LIT_EXTRA_ARGS GLOBAL PROPERTY LLVM_LIT_EXTRA_ARGS) + add_lit_target(check-all + "Running all regression tests" + ${LLVM_LIT_TESTSUITES} + PARAMS ${LLVM_LIT_PARAMS} + DEPENDS ${LLVM_LIT_DEPENDS} + ARGS ${LLVM_LIT_EXTRA_ARGS} + ) endif() add_subdirectory(cmake/modules) diff --git a/CREDITS.TXT b/CREDITS.TXT index 6b10a0de07..f090ad734c 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -22,6 +22,10 @@ D: GVNPRE pass, TargetData refactoring, random improvements N: Henrik Bach D: MingW Win32 API portability layer +N: Aaron Ballman +E: aaron@aaronballman.com +D: __declspec attributes, Windows support, general bug fixing + N: Nate Begeman E: natebegeman@mac.com D: PowerPC backend developer diff --git a/Makefile.config.in b/Makefile.config.in index e90731b3a0..cc538b3f42 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -191,8 +191,6 @@ GAS := @GAS@ POD2HTML := @POD2HTML@ POD2MAN := @POD2MAN@ PDFROFF := @PDFROFF@ -RUNTEST := @RUNTEST@ -TCLSH := @TCLSH@ ZIP := @ZIP@ HAVE_PTHREAD := @HAVE_PTHREAD@ diff --git a/Makefile.rules b/Makefile.rules index f44d2c5884..7ef13bb786 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -1983,20 +1983,9 @@ check:: $(EchoCmd) No test directory ; \ fi +# An alias dating from when both lit and DejaGNU test runners were used. check-lit:: check -check-dg:: - $(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \ - if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \ - $(EchoCmd) Running test suite ; \ - $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-dg ; \ - else \ - $(EchoCmd) No Makefile in test directory ; \ - fi ; \ - else \ - $(EchoCmd) No test directory ; \ - fi - check-all:: $(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \ if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \ diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 5e1b194e25..676154b3e5 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1054,8 +1054,6 @@ AC_PATH_PROG(DOXYGEN, [doxygen]) AC_PATH_PROG(GROFF, [groff]) AC_PATH_PROG(GZIPBIN, [gzip]) AC_PATH_PROG(PDFROFF, [pdfroff]) -AC_PATH_PROG(RUNTEST, [runtest]) -DJ_AC_PATH_TCLSH AC_PATH_PROG(ZIP, [zip]) AC_PATH_PROGS(OCAMLC, [ocamlc]) AC_PATH_PROGS(OCAMLOPT, [ocamlopt]) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index bfe5127a4b..b5b74f0f16 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1,3 +1,4 @@ +include(LLVMParseArguments) include(LLVMProcessSources) include(LLVM-Config) @@ -82,7 +83,6 @@ macro(add_llvm_executable name) add_executable(${name} ${ALL_FILES}) endif() set(EXCLUDE_FROM_ALL OFF) - target_link_libraries( ${name} ${LLVM_USED_LIBS} ) llvm_config( ${name} ${LLVM_LINK_COMPONENTS} ) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) @@ -148,3 +148,152 @@ macro(add_llvm_external_project name) endif() endif() endmacro(add_llvm_external_project) + +# Generic support for adding a unittest. +function(add_unittest test_suite test_name) + if (CMAKE_BUILD_TYPE) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}) + else() + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() + if( NOT LLVM_BUILD_TESTS ) + set(EXCLUDE_FROM_ALL ON) + endif() + + add_llvm_executable(${test_name} ${ARGN}) + target_link_libraries(${test_name} + gtest + gtest_main + LLVMSupport # gtest needs it for raw_ostream. + ) + + add_dependencies(${test_suite} ${test_name}) + get_target_property(test_suite_folder ${test_suite} FOLDER) + if (NOT ${test_suite_folder} STREQUAL "NOTFOUND") + set_property(TARGET ${test_name} PROPERTY FOLDER "${test_suite_folder}") + endif () + + # Visual Studio 2012 only supports up to 8 template parameters in + # std::tr1::tuple by default, but gtest requires 10 + if (MSVC AND MSVC_VERSION EQUAL 1700) + set_property(TARGET ${test_name} APPEND PROPERTY COMPILE_DEFINITIONS _VARIADIC_MAX=10) + endif () + + include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) + set_property(TARGET ${test_name} APPEND PROPERTY COMPILE_DEFINITIONS GTEST_HAS_RTTI=0) + if (NOT LLVM_ENABLE_THREADS) + set_property(TARGET ${test_name} APPEND PROPERTY COMPILE_DEFINITIONS GTEST_HAS_PTHREAD=0) + endif () + + get_property(target_compile_flags TARGET ${test_name} PROPERTY COMPILE_FLAGS) + if (LLVM_COMPILER_IS_GCC_COMPATIBLE) + set(target_compile_flags "${target_compile_flags} -fno-rtti") + elseif (MSVC) + set(target_compile_flags "${target_compile_flags} /GR-") + endif () + + if (SUPPORTS_NO_VARIADIC_MACROS_FLAG) + set(target_compile_flags "${target_compile_flags} -Wno-variadic-macros") + endif () + set_property(TARGET ${test_name} PROPERTY COMPILE_FLAGS "${target_compile_flags}") +endfunction() + +# This function provides an automatic way to 'configure'-like generate a file +# based on a set of common and custom variables, specifically targetting the +# variables needed for the 'lit.site.cfg' files. This function bundles the +# common variables that any Lit instance is likely to need, and custom +# variables can be passed in. +function(configure_lit_site_cfg input output) + foreach(c ${LLVM_TARGETS_TO_BUILD}) + set(TARGETS_BUILT "${TARGETS_BUILT} ${c}") + endforeach(c) + set(TARGETS_TO_BUILD ${TARGETS_BUILT}) + + set(SHLIBEXT "${LTDL_SHLIB_EXT}") + set(SHLIBDIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}") + + if(BUILD_SHARED_LIBS) + set(LLVM_SHARED_LIBS_ENABLED "1") + else() + set(LLVM_SHARED_LIBS_ENABLED "0") + endif(BUILD_SHARED_LIBS) + + if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + set(SHLIBPATH_VAR "DYLD_LIBRARY_PATH") + else() # Default for all other unix like systems. + # CMake hardcodes the library locaction using rpath. + # Therefore LD_LIBRARY_PATH is not required to run binaries in the + # build dir. We pass it anyways. + set(SHLIBPATH_VAR "LD_LIBRARY_PATH") + endif() + + # Configuration-time: See Unit/lit.site.cfg.in + set(LLVM_BUILD_MODE "%(build_mode)s") + + set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) + set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) + set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") + set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/%(build_config)s") + set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) + set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED}) + set(SHLIBPATH_VAR ${SHLIBPATH_VAR}) + + if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE) + set(ENABLE_ASSERTIONS "1") + else() + set(ENABLE_ASSERTIONS "0") + endif() + + set(HOST_OS ${CMAKE_HOST_SYSTEM_NAME}) + set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) + + configure_file(${input} ${output} @ONLY) +endfunction() + +# A raw function to create a lit target. This is used to implement the testuite +# management functions. +function(add_lit_target target comment) + parse_arguments(ARG "PARAMS;DEPENDS;ARGS" "" ${ARGN}) + set(LIT_ARGS "${ARG_ARGS} ${LLVM_LIT_ARGS}") + separate_arguments(LIT_ARGS) + set(LIT_COMMAND + ${PYTHON_EXECUTABLE} + ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py + --param build_config=${CMAKE_CFG_INTDIR} + --param build_mode=${RUNTIME_BUILD_MODE} + ${LIT_ARGS} + ) + foreach(param ${ARG_PARAMS}) + list(APPEND LIT_COMMAND --param ${param}) + endforeach() + add_custom_target(${target} + COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS} + COMMENT "${comment}" + ) + + # It would be nice to use the DEPENDS clause in add_custom_target above, but + # that has bugs with the CMake 2.8.0 installed on Ubuntu Lucid when the entry + # in the depends is another custom target. Instead we add them through an + # explicit add_dependencies. + add_dependencies(${target} ${ARG_DEPENDS}) +endfunction() + +# A function to add a set of lit test suites to be driven through 'check-*' targets. +function(add_lit_testsuite target comment) + parse_arguments(ARG "PARAMS;DEPENDS;ARGS" "" ${ARGN}) + + # Register the testsuites, params and depends for the global check rule. + set_property(GLOBAL APPEND PROPERTY LLVM_LIT_TESTSUITES ${ARG_DEFAULT_ARGS}) + set_property(GLOBAL APPEND PROPERTY LLVM_LIT_PARAMS ${ARG_PARAMS}) + set_property(GLOBAL APPEND PROPERTY LLVM_LIT_DEPENDS ${ARG_DEPENDS}) + set_property(GLOBAL APPEND PROPERTY LLVM_LIT_EXTRA_ARGS ${ARG_ARGS}) + + # Produce a specific suffixed check rule. + add_lit_target(${target} ${comment} + ${ARG_DEFAULT_ARGS} + PARAMS ${ARG_PARAMS} + DEPENDS ${ARG_DEPENDS} + ARGS ${ARG_ARGS} + ) +endfunction() diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake index 641f1b33e1..0e410edc15 100644 --- a/cmake/modules/LLVMProcessSources.cmake +++ b/cmake/modules/LLVMProcessSources.cmake @@ -29,7 +29,7 @@ endmacro(add_td_sources) macro(add_header_files srcs) - file(GLOB hds *.h *.def) + file(GLOB hds *.h) if( hds ) set_source_files_properties(${hds} PROPERTIES HEADER_FILE_ONLY ON) list(APPEND ${srcs} ${hds}) @@ -50,6 +50,7 @@ function(llvm_process_sources OUT_VAR) endforeach(s) if( MSVC_IDE ) # This adds .td and .h files to the Visual Studio solution: + # FIXME: Shall we handle *.def here? add_td_sources(sources) add_header_files(sources) endif() @@ -81,10 +82,13 @@ function(llvm_check_source_file_list) file(GLOB globbed *.cpp) foreach(g ${globbed}) get_filename_component(fn ${g} NAME) - list(FIND listed ${fn} idx) + list(FIND LLVM_OPTIONAL_SOURCES ${fn} idx) if( idx LESS 0 ) - message(SEND_ERROR "Found unknown source file ${g} + list(FIND listed ${fn} idx) + if( idx LESS 0 ) + message(SEND_ERROR "Found unknown source file ${g} Please update ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt\n") + endif() endif() endforeach() endfunction(llvm_check_source_file_list) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index 1b1b1728d7..e3bdd9c704 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -50,7 +50,9 @@ function(add_public_tablegen_target target) if( TABLEGEN_OUTPUT ) add_custom_target(${target} DEPENDS ${TABLEGEN_OUTPUT}) - add_dependencies(${target} ${LLVM_COMMON_DEPENDS}) + if (LLVM_COMMON_DEPENDS) + add_dependencies(${target} ${LLVM_COMMON_DEPENDS}) + endif () set_target_properties(${target} PROPERTIES FOLDER "Tablegenning") endif( TABLEGEN_OUTPUT ) endfunction() @@ -745,8 +745,6 @@ DOXYGEN GROFF GZIPBIN PDFROFF -RUNTEST -TCLSH ZIP OCAMLC OCAMLOPT @@ -1447,7 +1445,6 @@ Optional Packages: --with-bug-report-url Specify the URL where bug reports should be submitted (default=http://llvm.org/bugs/) --with-internal-prefix Installation directory for internal files - --with-tclinclude directory where tcl headers are --with-udis86=<path> Use udis86 external x86 disassembler library --with-oprofile=<prefix> Tell OProfile >= 0.9.4 how to symbolize JIT output @@ -7261,136 +7258,6 @@ echo "${ECHO_T}no" >&6; } fi -# Extract the first word of "runtest", so it can be a program name with args. -set dummy runtest; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_path_RUNTEST+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - case $RUNTEST in - [\\/]* | ?:[\\/]*) - ac_cv_path_RUNTEST="$RUNTEST" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_RUNTEST="$as_dir/$ac_word$ac_exec_ext" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - ;; -esac -fi -RUNTEST=$ac_cv_path_RUNTEST -if test -n "$RUNTEST"; then - { echo "$as_me:$LINENO: result: $RUNTEST" >&5 -echo "${ECHO_T}$RUNTEST" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - -no_itcl=true -{ echo "$as_me:$LINENO: checking for the tclsh program in tclinclude directory" >&5 -echo $ECHO_N "checking for the tclsh program in tclinclude directory... $ECHO_C" >&6; } - -# Check whether --with-tclinclude was given. -if test "${with_tclinclude+set}" = set; then - withval=$with_tclinclude; with_tclinclude=${withval} -else - with_tclinclude='' -fi - -if test "${ac_cv_path_tclsh+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - -if test x"${with_tclinclude}" != x ; then - if test -f ${with_tclinclude}/tclsh ; then - ac_cv_path_tclsh=`(cd ${with_tclinclude}; pwd)` - elif test -f ${with_tclinclude}/src/tclsh ; then - ac_cv_path_tclsh=`(cd ${with_tclinclude}/src; pwd)` - else - { { echo "$as_me:$LINENO: error: ${with_tclinclude} directory doesn't contain tclsh" >&5 -echo "$as_me: error: ${with_tclinclude} directory doesn't contain tclsh" >&2;} - { (exit 1); exit 1; }; } - fi -fi -fi - - -if test x"${ac_cv_path_tclsh}" = x ; then - { echo "$as_me:$LINENO: result: none" >&5 -echo "${ECHO_T}none" >&6; } - for ac_prog in tclsh8.4 tclsh8.4.8 tclsh8.4.7 tclsh8.4.6 tclsh8.4.5 tclsh8.4.4 tclsh8.4.3 tclsh8.4.2 tclsh8.4.1 tclsh8.4.0 tclsh8.3 tclsh8.3.5 tclsh8.3.4 tclsh8.3.3 tclsh8.3.2 tclsh8.3.1 tclsh8.3.0 tclsh -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ echo "$as_me:$LINENO: checking for $ac_word" >&5 -echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } -if test "${ac_cv_path_TCLSH+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - case $TCLSH in - [\\/]* | ?:[\\/]*) - ac_cv_path_TCLSH="$TCLSH" # Let the user override the test with a path. - ;; - *) - as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then - ac_cv_path_TCLSH="$as_dir/$ac_word$ac_exec_ext" - echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done -done -IFS=$as_save_IFS - - ;; -esac -fi -TCLSH=$ac_cv_path_TCLSH -if test -n "$TCLSH"; then - { echo "$as_me:$LINENO: result: $TCLSH" >&5 -echo "${ECHO_T}$TCLSH" >&6; } -else - { echo "$as_me:$LINENO: result: no" >&5 -echo "${ECHO_T}no" >&6; } -fi - - - test -n "$TCLSH" && break -done - - if test x"${TCLSH}" = x ; then - ac_cv_path_tclsh=''; - else - ac_cv_path_tclsh="${TCLSH}"; - fi -else - { echo "$as_me:$LINENO: result: ${ac_cv_path_tclsh}" >&5 -echo "${ECHO_T}${ac_cv_path_tclsh}" >&6; } - TCLSH="${ac_cv_path_tclsh}" - -fi - # Extract the first word of "zip", so it can be a program name with args. set dummy zip; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 @@ -10348,7 +10215,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10346 "configure" +#line 10213 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -22172,8 +22039,6 @@ DOXYGEN!$DOXYGEN$ac_delim GROFF!$GROFF$ac_delim GZIPBIN!$GZIPBIN$ac_delim PDFROFF!$PDFROFF$ac_delim -RUNTEST!$RUNTEST$ac_delim -TCLSH!$TCLSH$ac_delim ZIP!$ZIP$ac_delim OCAMLC!$OCAMLC$ac_delim OCAMLOPT!$OCAMLOPT$ac_delim @@ -22219,7 +22084,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html deleted file mode 100644 index 638823588b..0000000000 --- a/docs/AliasAnalysis.html +++ /dev/null @@ -1,1067 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Alias Analysis Infrastructure</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1> - LLVM Alias Analysis Infrastructure -</h1> - -<ol> - <li><a href="#introduction">Introduction</a></li> - - <li><a href="#overview"><tt>AliasAnalysis</tt> Class Overview</a> - <ul> - <li><a href="#pointers">Representation of Pointers</a></li> - <li><a href="#alias">The <tt>alias</tt> method</a></li> - <li><a href="#ModRefInfo">The <tt>getModRefInfo</tt> methods</a></li> - <li><a href="#OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a></li> - </ul> - </li> - - <li><a href="#writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a> - <ul> - <li><a href="#passsubclasses">Different Pass styles</a></li> - <li><a href="#requiredcalls">Required initialization calls</a></li> - <li><a href="#interfaces">Interfaces which may be specified</a></li> - <li><a href="#chaining"><tt>AliasAnalysis</tt> chaining behavior</a></li> - <li><a href="#updating">Updating analysis results for transformations</a></li> - <li><a href="#implefficiency">Efficiency Issues</a></li> - <li><a href="#limitations">Limitations</a></li> - </ul> - </li> - - <li><a href="#using">Using alias analysis results</a> - <ul> - <li><a href="#memdep">Using the <tt>MemoryDependenceAnalysis</tt> Pass</a></li> - <li><a href="#ast">Using the <tt>AliasSetTracker</tt> class</a></li> - <li><a href="#direct">Using the <tt>AliasAnalysis</tt> interface directly</a></li> - </ul> - </li> - - <li><a href="#exist">Existing alias analysis implementations and clients</a> - <ul> - <li><a href="#impls">Available <tt>AliasAnalysis</tt> implementations</a></li> - <li><a href="#aliasanalysis-xforms">Alias analysis driven transformations</a></li> - <li><a href="#aliasanalysis-debug">Clients for debugging and evaluation of - implementations</a></li> - </ul> - </li> - <li><a href="#memdep">Memory Dependence Analysis</a></li> -</ol> - -<div class="doc_author"> - <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p> -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="introduction">Introduction</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt -to determine whether or not two pointers ever can point to the same object in -memory. There are many different algorithms for alias analysis and many -different ways of classifying them: flow-sensitive vs flow-insensitive, -context-sensitive vs context-insensitive, field-sensitive vs field-insensitive, -unification-based vs subset-based, etc. Traditionally, alias analyses respond -to a query with a <a href="#MustMayNo">Must, May, or No</a> alias response, -indicating that two pointers always point to the same object, might point to the -same object, or are known to never point to the same object.</p> - -<p>The LLVM <a -href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a> -class is the primary interface used by clients and implementations of alias -analyses in the LLVM system. This class is the common interface between clients -of alias analysis information and the implementations providing it, and is -designed to support a wide range of implementations and clients (but currently -all clients are assumed to be flow-insensitive). In addition to simple alias -analysis information, this class exposes Mod/Ref information from those -implementations which can provide it, allowing for powerful analyses and -transformations to work well together.</p> - -<p>This document contains information necessary to successfully implement this -interface, use it, and to test both sides. It also explains some of the finer -points about what exactly results mean. If you feel that something is unclear -or should be added, please <a href="mailto:sabre@nondot.org">let me -know</a>.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="overview"><tt>AliasAnalysis</tt> Class Overview</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>The <a -href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a> -class defines the interface that the various alias analysis implementations -should support. This class exports two important enums: <tt>AliasResult</tt> -and <tt>ModRefResult</tt> which represent the result of an alias query or a -mod/ref query, respectively.</p> - -<p>The <tt>AliasAnalysis</tt> interface exposes information about memory, -represented in several different ways. In particular, memory objects are -represented as a starting address and size, and function calls are represented -as the actual <tt>call</tt> or <tt>invoke</tt> instructions that performs the -call. The <tt>AliasAnalysis</tt> interface also exposes some helper methods -which allow you to get mod/ref information for arbitrary instructions.</p> - -<p>All <tt>AliasAnalysis</tt> interfaces require that in queries involving -multiple values, values which are not -<a href="LangRef.html#constants">constants</a> are all defined within the -same function.</p> - -<!-- ======================================================================= --> -<h3> - <a name="pointers">Representation of Pointers</a> -</h3> - -<div> - -<p>Most importantly, the <tt>AliasAnalysis</tt> class provides several methods -which are used to query whether or not two memory objects alias, whether -function calls can modify or read a memory object, etc. For all of these -queries, memory objects are represented as a pair of their starting address (a -symbolic LLVM <tt>Value*</tt>) and a static size.</p> - -<p>Representing memory objects as a starting address and a size is critically -important for correct Alias Analyses. For example, consider this (silly, but -possible) C code:</p> - -<div class="doc_code"> -<pre> -int i; -char C[2]; -char A[10]; -/* ... */ -for (i = 0; i != 10; ++i) { - C[0] = A[i]; /* One byte store */ - C[1] = A[9-i]; /* One byte store */ -} -</pre> -</div> - -<p>In this case, the <tt>basicaa</tt> pass will disambiguate the stores to -<tt>C[0]</tt> and <tt>C[1]</tt> because they are accesses to two distinct -locations one byte apart, and the accesses are each one byte. In this case, the -LICM pass can use store motion to remove the stores from the loop. In -constrast, the following code:</p> - -<div class="doc_code"> -<pre> -int i; -char C[2]; -char A[10]; -/* ... */ -for (i = 0; i != 10; ++i) { - ((short*)C)[0] = A[i]; /* Two byte store! */ - C[1] = A[9-i]; /* One byte store */ -} -</pre> -</div> - -<p>In this case, the two stores to C do alias each other, because the access to -the <tt>&C[0]</tt> element is a two byte access. If size information wasn't -available in the query, even the first case would have to conservatively assume -that the accesses alias.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="alias">The <tt>alias</tt> method</a> -</h3> - -<div> -<p>The <tt>alias</tt> method is the primary interface used to determine whether -or not two memory objects alias each other. It takes two memory objects as -input and returns MustAlias, PartialAlias, MayAlias, or NoAlias as -appropriate.</p> - -<p>Like all <tt>AliasAnalysis</tt> interfaces, the <tt>alias</tt> method requires -that either the two pointer values be defined within the same function, or at -least one of the values is a <a href="LangRef.html#constants">constant</a>.</p> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="MustMayNo">Must, May, and No Alias Responses</a> -</h4> - -<div> -<p>The NoAlias response may be used when there is never an immediate dependence -between any memory reference <i>based</i> on one pointer and any memory -reference <i>based</i> the other. The most obvious example is when the two -pointers point to non-overlapping memory ranges. Another is when the two -pointers are only ever used for reading memory. Another is when the memory is -freed and reallocated between accesses through one pointer and accesses through -the other -- in this case, there is a dependence, but it's mediated by the free -and reallocation.</p> - -<p>As an exception to this is with the -<a href="LangRef.html#noalias"><tt>noalias</tt></a> keyword; the "irrelevant" -dependencies are ignored.</p> - -<p>The MayAlias response is used whenever the two pointers might refer to the -same object.</p> - -<p>The PartialAlias response is used when the two memory objects are known -to be overlapping in some way, but do not start at the same address.</p> - -<p>The MustAlias response may only be returned if the two memory objects are -guaranteed to always start at exactly the same location. A MustAlias response -implies that the pointers compare equal.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="ModRefInfo">The <tt>getModRefInfo</tt> methods</a> -</h3> - -<div> - -<p>The <tt>getModRefInfo</tt> methods return information about whether the -execution of an instruction can read or modify a memory location. Mod/Ref -information is always conservative: if an instruction <b>might</b> read or write -a location, ModRef is returned.</p> - -<p>The <tt>AliasAnalysis</tt> class also provides a <tt>getModRefInfo</tt> -method for testing dependencies between function calls. This method takes two -call sites (CS1 & CS2), returns NoModRef if neither call writes to memory -read or written by the other, Ref if CS1 reads memory written by CS2, Mod if CS1 -writes to memory read or written by CS2, or ModRef if CS1 might read or write -memory written to by CS2. Note that this relation is not commutative.</p> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a> -</h3> - -<div> - -<p> -Several other tidbits of information are often collected by various alias -analysis implementations and can be put to good use by various clients. -</p> - -<!-- _______________________________________________________________________ --> -<h4> - The <tt>pointsToConstantMemory</tt> method -</h4> - -<div> - -<p>The <tt>pointsToConstantMemory</tt> method returns true if and only if the -analysis can prove that the pointer only points to unchanging memory locations -(functions, constant global variables, and the null pointer). This information -can be used to refine mod/ref information: it is impossible for an unchanging -memory location to be modified.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="simplemodref">The <tt>doesNotAccessMemory</tt> and - <tt>onlyReadsMemory</tt> methods</a> -</h4> - -<div> - -<p>These methods are used to provide very simple mod/ref information for -function calls. The <tt>doesNotAccessMemory</tt> method returns true for a -function if the analysis can prove that the function never reads or writes to -memory, or if the function only reads from constant memory. Functions with this -property are side-effect free and only depend on their input arguments, allowing -them to be eliminated if they form common subexpressions or be hoisted out of -loops. Many common functions behave this way (e.g., <tt>sin</tt> and -<tt>cos</tt>) but many others do not (e.g., <tt>acos</tt>, which modifies the -<tt>errno</tt> variable).</p> - -<p>The <tt>onlyReadsMemory</tt> method returns true for a function if analysis -can prove that (at most) the function only reads from non-volatile memory. -Functions with this property are side-effect free, only depending on their input -arguments and the state of memory when they are called. This property allows -calls to these functions to be eliminated and moved around, as long as there is -no store instruction that changes the contents of memory. Note that all -functions that satisfy the <tt>doesNotAccessMemory</tt> method also satisfies -<tt>onlyReadsMemory</tt>.</p> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Writing a new alias analysis implementation for LLVM is quite -straight-forward. There are already several implementations that you can use -for examples, and the following information should help fill in any details. -For a examples, take a look at the <a href="#impls">various alias analysis -implementations</a> included with LLVM.</p> - -<!-- ======================================================================= --> -<h3> - <a name="passsubclasses">Different Pass styles</a> -</h3> - -<div> - -<p>The first step to determining what type of <a -href="WritingAnLLVMPass.html">LLVM pass</a> you need to use for your Alias -Analysis. As is the case with most other analyses and transformations, the -answer should be fairly obvious from what type of problem you are trying to -solve:</p> - -<ol> - <li>If you require interprocedural analysis, it should be a - <tt>Pass</tt>.</li> - <li>If you are a function-local analysis, subclass <tt>FunctionPass</tt>.</li> - <li>If you don't need to look at the program at all, subclass - <tt>ImmutablePass</tt>.</li> -</ol> - -<p>In addition to the pass that you subclass, you should also inherit from the -<tt>AliasAnalysis</tt> interface, of course, and use the -<tt>RegisterAnalysisGroup</tt> template to register as an implementation of -<tt>AliasAnalysis</tt>.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="requiredcalls">Required initialization calls</a> -</h3> - -<div> - -<p>Your subclass of <tt>AliasAnalysis</tt> is required to invoke two methods on -the <tt>AliasAnalysis</tt> base class: <tt>getAnalysisUsage</tt> and -<tt>InitializeAliasAnalysis</tt>. In particular, your implementation of -<tt>getAnalysisUsage</tt> should explicitly call into the -<tt>AliasAnalysis::getAnalysisUsage</tt> method in addition to doing any -declaring any pass dependencies your pass has. Thus you should have something -like this:</p> - -<div class="doc_code"> -<pre> -void getAnalysisUsage(AnalysisUsage &AU) const { - AliasAnalysis::getAnalysisUsage(AU); - <i>// declare your dependencies here.</i> -} -</pre> -</div> - -<p>Additionally, your must invoke the <tt>InitializeAliasAnalysis</tt> method -from your analysis run method (<tt>run</tt> for a <tt>Pass</tt>, -<tt>runOnFunction</tt> for a <tt>FunctionPass</tt>, or <tt>InitializePass</tt> -for an <tt>ImmutablePass</tt>). For example (as part of a <tt>Pass</tt>):</p> - -<div class="doc_code"> -<pre> -bool run(Module &M) { - InitializeAliasAnalysis(this); - <i>// Perform analysis here...</i> - return false; -} -</pre> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="interfaces">Interfaces which may be specified</a> -</h3> - -<div> - -<p>All of the <a -href="/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a> -virtual methods default to providing <a href="#chaining">chaining</a> to another -alias analysis implementation, which ends up returning conservatively correct -information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries -respectively). Depending on the capabilities of the analysis you are -implementing, you just override the interfaces you can improve.</p> - -</div> - - - -<!-- ======================================================================= --> -<h3> - <a name="chaining"><tt>AliasAnalysis</tt> chaining behavior</a> -</h3> - -<div> - -<p>With only one special exception (the <a href="#no-aa"><tt>no-aa</tt></a> -pass) every alias analysis pass chains to another alias analysis -implementation (for example, the user can specify "<tt>-basicaa -ds-aa --licm</tt>" to get the maximum benefit from both alias -analyses). The alias analysis class automatically takes care of most of this -for methods that you don't override. For methods that you do override, in code -paths that return a conservative MayAlias or Mod/Ref result, simply return -whatever the superclass computes. For example:</p> - -<div class="doc_code"> -<pre> -AliasAnalysis::AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - if (...) - return NoAlias; - ... - - <i>// Couldn't determine a must or no-alias result.</i> - return AliasAnalysis::alias(V1, V1Size, V2, V2Size); -} -</pre> -</div> - -<p>In addition to analysis queries, you must make sure to unconditionally pass -LLVM <a href="#updating">update notification</a> methods to the superclass as -well if you override them, which allows all alias analyses in a change to be -updated.</p> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="updating">Updating analysis results for transformations</a> -</h3> - -<div> -<p> -Alias analysis information is initially computed for a static snapshot of the -program, but clients will use this information to make transformations to the -code. All but the most trivial forms of alias analysis will need to have their -analysis results updated to reflect the changes made by these transformations. -</p> - -<p> -The <tt>AliasAnalysis</tt> interface exposes four methods which are used to -communicate program changes from the clients to the analysis implementations. -Various alias analysis implementations should use these methods to ensure that -their internal data structures are kept up-to-date as the program changes (for -example, when an instruction is deleted), and clients of alias analysis must be -sure to call these interfaces appropriately. -</p> - -<!-- _______________________________________________________________________ --> -<h4>The <tt>deleteValue</tt> method</h4> - -<div> -The <tt>deleteValue</tt> method is called by transformations when they remove an -instruction or any other value from the program (including values that do not -use pointers). Typically alias analyses keep data structures that have entries -for each value in the program. When this method is called, they should remove -any entries for the specified value, if they exist. -</div> - -<!-- _______________________________________________________________________ --> -<h4>The <tt>copyValue</tt> method</h4> - -<div> -The <tt>copyValue</tt> method is used when a new value is introduced into the -program. There is no way to introduce a value into the program that did not -exist before (this doesn't make sense for a safe compiler transformation), so -this is the only way to introduce a new value. This method indicates that the -new value has exactly the same properties as the value being copied. -</div> - -<!-- _______________________________________________________________________ --> -<h4>The <tt>replaceWithNewValue</tt> method</h4> - -<div> -This method is a simple helper method that is provided to make clients easier to -use. It is implemented by copying the old analysis information to the new -value, then deleting the old value. This method cannot be overridden by alias -analysis implementations. -</div> - -<!-- _______________________________________________________________________ --> -<h4>The <tt>addEscapingUse</tt> method</h4> - -<div> -<p>The <tt>addEscapingUse</tt> method is used when the uses of a pointer -value have changed in ways that may invalidate precomputed analysis information. -Implementations may either use this callback to provide conservative responses -for points whose uses have change since analysis time, or may recompute some -or all of their internal state to continue providing accurate responses.</p> - -<p>In general, any new use of a pointer value is considered an escaping use, -and must be reported through this callback, <em>except</em> for the -uses below:</p> - -<ul> - <li>A <tt>bitcast</tt> or <tt>getelementptr</tt> of the pointer</li> - <li>A <tt>store</tt> through the pointer (but not a <tt>store</tt> - <em>of</em> the pointer)</li> - <li>A <tt>load</tt> through the pointer</li> -</ul> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="implefficiency">Efficiency Issues</a> -</h3> - -<div> - -<p>From the LLVM perspective, the only thing you need to do to provide an -efficient alias analysis is to make sure that alias analysis <b>queries</b> are -serviced quickly. The actual calculation of the alias analysis results (the -"run" method) is only performed once, but many (perhaps duplicate) queries may -be performed. Because of this, try to move as much computation to the run -method as possible (within reason).</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="limitations">Limitations</a> -</h3> - -<div> - -<p>The AliasAnalysis infrastructure has several limitations which make -writing a new <tt>AliasAnalysis</tt> implementation difficult.</p> - -<p>There is no way to override the default alias analysis. It would -be very useful to be able to do something like "opt -my-aa -O2" and -have it use -my-aa for all passes which need AliasAnalysis, but there -is currently no support for that, short of changing the source code -and recompiling. Similarly, there is also no way of setting a chain -of analyses as the default.</p> - -<p>There is no way for transform passes to declare that they preserve -<tt>AliasAnalysis</tt> implementations. The <tt>AliasAnalysis</tt> -interface includes <tt>deleteValue</tt> and <tt>copyValue</tt> methods -which are intended to allow a pass to keep an AliasAnalysis consistent, -however there's no way for a pass to declare in its -<tt>getAnalysisUsage</tt> that it does so. Some passes attempt to use -<tt>AU.addPreserved<AliasAnalysis></tt>, however this doesn't -actually have any effect.</p> - -<p><tt>AliasAnalysisCounter</tt> (<tt>-count-aa</tt>) and <tt>AliasDebugger</tt> -(<tt>-debug-aa</tt>) are implemented as <tt>ModulePass</tt> classes, so if your -alias analysis uses <tt>FunctionPass</tt>, it won't be able to use -these utilities. If you try to use them, the pass manager will -silently route alias analysis queries directly to -<tt>BasicAliasAnalysis</tt> instead.</p> - -<p>Similarly, the <tt>opt -p</tt> option introduces <tt>ModulePass</tt> -passes between each pass, which prevents the use of <tt>FunctionPass</tt> -alias analysis passes.</p> - -<p>The <tt>AliasAnalysis</tt> API does have functions for notifying -implementations when values are deleted or copied, however these -aren't sufficient. There are many other ways that LLVM IR can be -modified which could be relevant to <tt>AliasAnalysis</tt> -implementations which can not be expressed.</p> - -<p>The <tt>AliasAnalysisDebugger</tt> utility seems to suggest that -<tt>AliasAnalysis</tt> implementations can expect that they will be -informed of any relevant <tt>Value</tt> before it appears in an -alias query. However, popular clients such as <tt>GVN</tt> don't -support this, and are known to trigger errors when run with the -<tt>AliasAnalysisDebugger</tt>.</p> - -<p>Due to several of the above limitations, the most obvious use for -the <tt>AliasAnalysisCounter</tt> utility, collecting stats on all -alias queries in a compilation, doesn't work, even if the -<tt>AliasAnalysis</tt> implementations don't use <tt>FunctionPass</tt>. -There's no way to set a default, much less a default sequence, -and there's no way to preserve it.</p> - -<p>The <tt>AliasSetTracker</tt> class (which is used by <tt>LICM</tt> -makes a non-deterministic number of alias queries. This can cause stats -collected by <tt>AliasAnalysisCounter</tt> to have fluctuations among -identical runs, for example. Another consequence is that debugging -techniques involving pausing execution after a predetermined number -of queries can be unreliable.</p> - -<p>Many alias queries can be reformulated in terms of other alias -queries. When multiple <tt>AliasAnalysis</tt> queries are chained together, -it would make sense to start those queries from the beginning of the chain, -with care taken to avoid infinite looping, however currently an -implementation which wants to do this can only start such queries -from itself.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="using">Using alias analysis results</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>There are several different ways to use alias analysis results. In order of -preference, these are...</p> - -<!-- ======================================================================= --> -<h3> - <a name="memdep">Using the <tt>MemoryDependenceAnalysis</tt> Pass</a> -</h3> - -<div> - -<p>The <tt>memdep</tt> pass uses alias analysis to provide high-level dependence -information about memory-using instructions. This will tell you which store -feeds into a load, for example. It uses caching and other techniques to be -efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations. -</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="ast">Using the <tt>AliasSetTracker</tt> class</a> -</h3> - -<div> - -<p>Many transformations need information about alias <b>sets</b> that are active -in some scope, rather than information about pairwise aliasing. The <tt><a -href="/doxygen/classllvm_1_1AliasSetTracker.html">AliasSetTracker</a></tt> class -is used to efficiently build these Alias Sets from the pairwise alias analysis -information provided by the <tt>AliasAnalysis</tt> interface.</p> - -<p>First you initialize the AliasSetTracker by using the "<tt>add</tt>" methods -to add information about various potentially aliasing instructions in the scope -you are interested in. Once all of the alias sets are completed, your pass -should simply iterate through the constructed alias sets, using the -<tt>AliasSetTracker</tt> <tt>begin()</tt>/<tt>end()</tt> methods.</p> - -<p>The <tt>AliasSet</tt>s formed by the <tt>AliasSetTracker</tt> are guaranteed -to be disjoint, calculate mod/ref information and volatility for the set, and -keep track of whether or not all of the pointers in the set are Must aliases. -The AliasSetTracker also makes sure that sets are properly folded due to call -instructions, and can provide a list of pointers in each set.</p> - -<p>As an example user of this, the <a href="/doxygen/structLICM.html">Loop -Invariant Code Motion</a> pass uses <tt>AliasSetTracker</tt>s to calculate alias -sets for each loop nest. If an <tt>AliasSet</tt> in a loop is not modified, -then all load instructions from that set may be hoisted out of the loop. If any -alias sets are stored to <b>and</b> are must alias sets, then the stores may be -sunk to outside of the loop, promoting the memory location to a register for the -duration of the loop nest. Both of these transformations only apply if the -pointer argument is loop-invariant.</p> - -<!-- _______________________________________________________________________ --> -<h4> - The AliasSetTracker implementation -</h4> - -<div> - -<p>The AliasSetTracker class is implemented to be as efficient as possible. It -uses the union-find algorithm to efficiently merge AliasSets when a pointer is -inserted into the AliasSetTracker that aliases multiple sets. The primary data -structure is a hash table mapping pointers to the AliasSet they are in.</p> - -<p>The AliasSetTracker class must maintain a list of all of the LLVM Value*'s -that are in each AliasSet. Since the hash table already has entries for each -LLVM Value* of interest, the AliasesSets thread the linked list through these -hash-table nodes to avoid having to allocate memory unnecessarily, and to make -merging alias sets extremely efficient (the linked list merge is constant time). -</p> - -<p>You shouldn't need to understand these details if you are just a client of -the AliasSetTracker, but if you look at the code, hopefully this brief -description will help make sense of why things are designed the way they -are.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="direct">Using the <tt>AliasAnalysis</tt> interface directly</a> -</h3> - -<div> - -<p>If neither of these utility class are what your pass needs, you should use -the interfaces exposed by the <tt>AliasAnalysis</tt> class directly. Try to use -the higher-level methods when possible (e.g., use mod/ref information instead of -the <a href="#alias"><tt>alias</tt></a> method directly if possible) to get the -best precision and efficiency.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="exist">Existing alias analysis implementations and clients</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If you're going to be working with the LLVM alias analysis infrastructure, -you should know what clients and implementations of alias analysis are -available. In particular, if you are implementing an alias analysis, you should -be aware of the <a href="#aliasanalysis-debug">the clients</a> that are useful -for monitoring and evaluating different implementations.</p> - -<!-- ======================================================================= --> -<h3> - <a name="impls">Available <tt>AliasAnalysis</tt> implementations</a> -</h3> - -<div> - -<p>This section lists the various implementations of the <tt>AliasAnalysis</tt> -interface. With the exception of the <a href="#no-aa"><tt>-no-aa</tt></a> -implementation, all of these <a href="#chaining">chain</a> to other alias -analysis implementations.</p> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="no-aa">The <tt>-no-aa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-no-aa</tt> pass is just like what it sounds: an alias analysis that -never returns any useful information. This pass can be useful if you think that -alias analysis is doing something wrong and are trying to narrow down a -problem.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="basic-aa">The <tt>-basicaa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-basicaa</tt> pass is an aggressive local analysis that "knows" -many important facts:</p> - -<ul> -<li>Distinct globals, stack allocations, and heap allocations can never - alias.</li> -<li>Globals, stack allocations, and heap allocations never alias the null - pointer.</li> -<li>Different fields of a structure do not alias.</li> -<li>Indexes into arrays with statically differing subscripts cannot alias.</li> -<li>Many common standard C library functions <a - href="#simplemodref">never access memory or only read memory</a>.</li> -<li>Pointers that obviously point to constant globals - "<tt>pointToConstantMemory</tt>".</li> -<li>Function calls can not modify or references stack allocations if they never - escape from the function that allocates them (a common case for automatic - arrays).</li> -</ul> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="globalsmodref">The <tt>-globalsmodref-aa</tt> pass</a> -</h4> - -<div> - -<p>This pass implements a simple context-sensitive mod/ref and alias analysis -for internal global variables that don't "have their address taken". If a -global does not have its address taken, the pass knows that no pointers alias -the global. This pass also keeps track of functions that it knows never access -memory or never read memory. This allows certain optimizations (e.g. GVN) to -eliminate call instructions entirely. -</p> - -<p>The real power of this pass is that it provides context-sensitive mod/ref -information for call instructions. This allows the optimizer to know that -calls to a function do not clobber or read the value of the global, allowing -loads and stores to be eliminated.</p> - -<p>Note that this pass is somewhat limited in its scope (only support -non-address taken globals), but is very quick analysis.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="steens-aa">The <tt>-steens-aa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-steens-aa</tt> pass implements a variation on the well-known -"Steensgaard's algorithm" for interprocedural alias analysis. Steensgaard's -algorithm is a unification-based, flow-insensitive, context-insensitive, and -field-insensitive alias analysis that is also very scalable (effectively linear -time).</p> - -<p>The LLVM <tt>-steens-aa</tt> pass implements a "speculatively -field-<b>sensitive</b>" version of Steensgaard's algorithm using the Data -Structure Analysis framework. This gives it substantially more precision than -the standard algorithm while maintaining excellent analysis scalability.</p> - -<p>Note that <tt>-steens-aa</tt> is available in the optional "poolalloc" -module, it is not part of the LLVM core.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ds-aa">The <tt>-ds-aa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-ds-aa</tt> pass implements the full Data Structure Analysis -algorithm. Data Structure Analysis is a modular unification-based, -flow-insensitive, context-<b>sensitive</b>, and speculatively -field-<b>sensitive</b> alias analysis that is also quite scalable, usually at -O(n*log(n)).</p> - -<p>This algorithm is capable of responding to a full variety of alias analysis -queries, and can provide context-sensitive mod/ref information as well. The -only major facility not implemented so far is support for must-alias -information.</p> - -<p>Note that <tt>-ds-aa</tt> is available in the optional "poolalloc" -module, it is not part of the LLVM core.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scev-aa">The <tt>-scev-aa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-scev-aa</tt> pass implements AliasAnalysis queries by -translating them into ScalarEvolution queries. This gives it a -more complete understanding of <tt>getelementptr</tt> instructions -and loop induction variables than other alias analyses have.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="aliasanalysis-xforms">Alias analysis driven transformations</a> -</h3> - -<div> -LLVM includes several alias-analysis driven transformations which can be used -with any of the implementations above. - -<!-- _______________________________________________________________________ --> -<h4> - <a name="adce">The <tt>-adce</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-adce</tt> pass, which implements Aggressive Dead Code Elimination -uses the <tt>AliasAnalysis</tt> interface to delete calls to functions that do -not have side-effects and are not used.</p> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4> - <a name="licm">The <tt>-licm</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-licm</tt> pass implements various Loop Invariant Code Motion related -transformations. It uses the <tt>AliasAnalysis</tt> interface for several -different transformations:</p> - -<ul> -<li>It uses mod/ref information to hoist or sink load instructions out of loops -if there are no instructions in the loop that modifies the memory loaded.</li> - -<li>It uses mod/ref information to hoist function calls out of loops that do not -write to memory and are loop-invariant.</li> - -<li>If uses alias information to promote memory objects that are loaded and -stored to in loops to live in a register instead. It can do this if there are -no may aliases to the loaded/stored memory location.</li> -</ul> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="argpromotion">The <tt>-argpromotion</tt> pass</a> -</h4> - -<div> -<p> -The <tt>-argpromotion</tt> pass promotes by-reference arguments to be passed in -by-value instead. In particular, if pointer arguments are only loaded from it -passes in the value loaded instead of the address to the function. This pass -uses alias information to make sure that the value loaded from the argument -pointer is not modified between the entry of the function and any load of the -pointer.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="gvn">The <tt>-gvn</tt>, <tt>-memcpyopt</tt>, and <tt>-dse</tt> - passes</a> -</h4> - -<div> - -<p>These passes use AliasAnalysis information to reason about loads and stores. -</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="aliasanalysis-debug">Clients for debugging and evaluation of - implementations</a> -</h3> - -<div> - -<p>These passes are useful for evaluating the various alias analysis -implementations. You can use them with commands like '<tt>opt -ds-aa --aa-eval foo.bc -disable-output -stats</tt>'.</p> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="print-alias-sets">The <tt>-print-alias-sets</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-print-alias-sets</tt> pass is exposed as part of the -<tt>opt</tt> tool to print out the Alias Sets formed by the <a -href="#ast"><tt>AliasSetTracker</tt></a> class. This is useful if you're using -the <tt>AliasSetTracker</tt> class. To use it, use something like:</p> - -<div class="doc_code"> -<pre> -% opt -ds-aa -print-alias-sets -disable-output -</pre> -</div> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4> - <a name="count-aa">The <tt>-count-aa</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-count-aa</tt> pass is useful to see how many queries a particular -pass is making and what responses are returned by the alias analysis. As an -example,</p> - -<div class="doc_code"> -<pre> -% opt -basicaa -count-aa -ds-aa -count-aa -licm -</pre> -</div> - -<p>will print out how many queries (and what responses are returned) by the -<tt>-licm</tt> pass (of the <tt>-ds-aa</tt> pass) and how many queries are made -of the <tt>-basicaa</tt> pass by the <tt>-ds-aa</tt> pass. This can be useful -when debugging a transformation or an alias analysis implementation.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="aa-eval">The <tt>-aa-eval</tt> pass</a> -</h4> - -<div> - -<p>The <tt>-aa-eval</tt> pass simply iterates through all pairs of pointers in a -function and asks an alias analysis whether or not the pointers alias. This -gives an indication of the precision of the alias analysis. Statistics are -printed indicating the percent of no/may/must aliases found (a more precise -algorithm will have a lower number of may aliases).</p> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="memdep">Memory Dependence Analysis</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If you're just looking to be a client of alias analysis information, consider -using the Memory Dependence Analysis interface instead. MemDep is a lazy, -caching layer on top of alias analysis that is able to answer the question of -what preceding memory operations a given instruction depends on, either at an -intra- or inter-block level. Because of its laziness and caching -policy, using MemDep can be a significant performance win over accessing alias -analysis directly.</p> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst new file mode 100644 index 0000000000..2d4f2914ee --- /dev/null +++ b/docs/AliasAnalysis.rst @@ -0,0 +1,702 @@ +.. _alias_analysis: + +================================== +LLVM Alias Analysis Infrastructure +================================== + +.. contents:: + :local: + +Introduction +============ + +Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt to +determine whether or not two pointers ever can point to the same object in +memory. There are many different algorithms for alias analysis and many +different ways of classifying them: flow-sensitive vs. flow-insensitive, +context-sensitive vs. context-insensitive, field-sensitive +vs. field-insensitive, unification-based vs. subset-based, etc. Traditionally, +alias analyses respond to a query with a `Must, May, or No`_ alias response, +indicating that two pointers always point to the same object, might point to the +same object, or are known to never point to the same object. + +The LLVM `AliasAnalysis +<http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`__ class is the +primary interface used by clients and implementations of alias analyses in the +LLVM system. This class is the common interface between clients of alias +analysis information and the implementations providing it, and is designed to +support a wide range of implementations and clients (but currently all clients +are assumed to be flow-insensitive). In addition to simple alias analysis +information, this class exposes Mod/Ref information from those implementations +which can provide it, allowing for powerful analyses and transformations to work +well together. + +This document contains information necessary to successfully implement this +interface, use it, and to test both sides. It also explains some of the finer +points about what exactly results mean. If you feel that something is unclear +or should be added, please `let me know <mailto:sabre@nondot.org>`_. + +``AliasAnalysis`` Class Overview +================================ + +The `AliasAnalysis <http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`__ +class defines the interface that the various alias analysis implementations +should support. This class exports two important enums: ``AliasResult`` and +``ModRefResult`` which represent the result of an alias query or a mod/ref +query, respectively. + +The ``AliasAnalysis`` interface exposes information about memory, represented in +several different ways. In particular, memory objects are represented as a +starting address and size, and function calls are represented as the actual +``call`` or ``invoke`` instructions that performs the call. The +``AliasAnalysis`` interface also exposes some helper methods which allow you to +get mod/ref information for arbitrary instructions. + +All ``AliasAnalysis`` interfaces require that in queries involving multiple +values, values which are not `constants <LangRef.html#constants>`_ are all +defined within the same function. + +Representation of Pointers +-------------------------- + +Most importantly, the ``AliasAnalysis`` class provides several methods which are +used to query whether or not two memory objects alias, whether function calls +can modify or read a memory object, etc. For all of these queries, memory +objects are represented as a pair of their starting address (a symbolic LLVM +``Value*``) and a static size. + +Representing memory objects as a starting address and a size is critically +important for correct Alias Analyses. For example, consider this (silly, but +possible) C code: + +.. code-block:: c++ + + int i; + char C[2]; + char A[10]; + /* ... */ + for (i = 0; i != 10; ++i) { + C[0] = A[i]; /* One byte store */ + C[1] = A[9-i]; /* One byte store */ + } + +In this case, the ``basicaa`` pass will disambiguate the stores to ``C[0]`` and +``C[1]`` because they are accesses to two distinct locations one byte apart, and +the accesses are each one byte. In this case, the Loop Invariant Code Motion +(LICM) pass can use store motion to remove the stores from the loop. In +constrast, the following code: + +.. code-block:: c++ + + int i; + char C[2]; + char A[10]; + /* ... */ + for (i = 0; i != 10; ++i) { + ((short*)C)[0] = A[i]; /* Two byte store! */ + C[1] = A[9-i]; /* One byte store */ + } + +In this case, the two stores to C do alias each other, because the access to the +``&C[0]`` element is a two byte access. If size information wasn't available in +the query, even the first case would have to conservatively assume that the +accesses alias. + +.. _alias: + +The ``alias`` method +-------------------- + +The ``alias`` method is the primary interface used to determine whether or not +two memory objects alias each other. It takes two memory objects as input and +returns MustAlias, PartialAlias, MayAlias, or NoAlias as appropriate. + +Like all ``AliasAnalysis`` interfaces, the ``alias`` method requires that either +the two pointer values be defined within the same function, or at least one of +the values is a `constant <LangRef.html#constants>`_. + +.. _Must, May, or No: + +Must, May, and No Alias Responses +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``NoAlias`` response may be used when there is never an immediate dependence +between any memory reference *based* on one pointer and any memory reference +*based* the other. The most obvious example is when the two pointers point to +non-overlapping memory ranges. Another is when the two pointers are only ever +used for reading memory. Another is when the memory is freed and reallocated +between accesses through one pointer and accesses through the other --- in this +case, there is a dependence, but it's mediated by the free and reallocation. + +As an exception to this is with the `noalias <LangRef.html#noalias>`_ keyword; +the "irrelevant" dependencies are ignored. + +The ``MayAlias`` response is used whenever the two pointers might refer to the +same object. + +The ``PartialAlias`` response is used when the two memory objects are known to +be overlapping in some way, but do not start at the same address. + +The ``MustAlias`` response may only be returned if the two memory objects are +guaranteed to always start at exactly the same location. A ``MustAlias`` +response implies that the pointers compare equal. + +The ``getModRefInfo`` methods +----------------------------- + +The ``getModRefInfo`` methods return information about whether the execution of +an instruction can read or modify a memory location. Mod/Ref information is +always conservative: if an instruction **might** read or write a location, +``ModRef`` is returned. + +The ``AliasAnalysis`` class also provides a ``getModRefInfo`` method for testing +dependencies between function calls. This method takes two call sites (``CS1`` +& ``CS2``), returns ``NoModRef`` if neither call writes to memory read or +written by the other, ``Ref`` if ``CS1`` reads memory written by ``CS2``, +``Mod`` if ``CS1`` writes to memory read or written by ``CS2``, or ``ModRef`` if +``CS1`` might read or write memory written to by ``CS2``. Note that this +relation is not commutative. + +Other useful ``AliasAnalysis`` methods +-------------------------------------- + +Several other tidbits of information are often collected by various alias +analysis implementations and can be put to good use by various clients. + +The ``pointsToConstantMemory`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``pointsToConstantMemory`` method returns true if and only if the analysis +can prove that the pointer only points to unchanging memory locations +(functions, constant global variables, and the null pointer). This information +can be used to refine mod/ref information: it is impossible for an unchanging +memory location to be modified. + +.. _never access memory or only read memory: + +The ``doesNotAccessMemory`` and ``onlyReadsMemory`` methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These methods are used to provide very simple mod/ref information for function +calls. The ``doesNotAccessMemory`` method returns true for a function if the +analysis can prove that the function never reads or writes to memory, or if the +function only reads from constant memory. Functions with this property are +side-effect free and only depend on their input arguments, allowing them to be +eliminated if they form common subexpressions or be hoisted out of loops. Many +common functions behave this way (e.g., ``sin`` and ``cos``) but many others do +not (e.g., ``acos``, which modifies the ``errno`` variable). + +The ``onlyReadsMemory`` method returns true for a function if analysis can prove +that (at most) the function only reads from non-volatile memory. Functions with +this property are side-effect free, only depending on their input arguments and +the state of memory when they are called. This property allows calls to these +functions to be eliminated and moved around, as long as there is no store +instruction that changes the contents of memory. Note that all functions that +satisfy the ``doesNotAccessMemory`` method also satisfies ``onlyReadsMemory``. + +Writing a new ``AliasAnalysis`` Implementation +============================================== + +Writing a new alias analysis implementation for LLVM is quite straight-forward. +There are already several implementations that you can use for examples, and the +following information should help fill in any details. For a examples, take a +look at the `various alias analysis implementations`_ included with LLVM. + +Different Pass styles +--------------------- + +The first step to determining what type of `LLVM pass <WritingAnLLVMPass.html>`_ +you need to use for your Alias Analysis. As is the case with most other +analyses and transformations, the answer should be fairly obvious from what type +of problem you are trying to solve: + +#. If you require interprocedural analysis, it should be a ``Pass``. +#. If you are a function-local analysis, subclass ``FunctionPass``. +#. If you don't need to look at the program at all, subclass ``ImmutablePass``. + +In addition to the pass that you subclass, you should also inherit from the +``AliasAnalysis`` interface, of course, and use the ``RegisterAnalysisGroup`` +template to register as an implementation of ``AliasAnalysis``. + +Required initialization calls +----------------------------- + +Your subclass of ``AliasAnalysis`` is required to invoke two methods on the +``AliasAnalysis`` base class: ``getAnalysisUsage`` and +``InitializeAliasAnalysis``. In particular, your implementation of +``getAnalysisUsage`` should explicitly call into the +``AliasAnalysis::getAnalysisUsage`` method in addition to doing any declaring +any pass dependencies your pass has. Thus you should have something like this: + +.. code-block:: c++ + + void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + // declare your dependencies here. + } + +Additionally, your must invoke the ``InitializeAliasAnalysis`` method from your +analysis run method (``run`` for a ``Pass``, ``runOnFunction`` for a +``FunctionPass``, or ``InitializePass`` for an ``ImmutablePass``). For example +(as part of a ``Pass``): + +.. code-block:: c++ + + bool run(Module &M) { + InitializeAliasAnalysis(this); + // Perform analysis here... + return false; + } + +Interfaces which may be specified +--------------------------------- + +All of the `AliasAnalysis +<http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html>`__ virtual methods +default to providing `chaining`_ to another alias analysis implementation, which +ends up returning conservatively correct information (returning "May" Alias and +"Mod/Ref" for alias and mod/ref queries respectively). Depending on the +capabilities of the analysis you are implementing, you just override the +interfaces you can improve. + +.. _chaining: +.. _chain: + +``AliasAnalysis`` chaining behavior +----------------------------------- + +With only one special exception (the `no-aa`_ pass) every alias analysis pass +chains to another alias analysis implementation (for example, the user can +specify "``-basicaa -ds-aa -licm``" to get the maximum benefit from both alias +analyses). The alias analysis class automatically takes care of most of this +for methods that you don't override. For methods that you do override, in code +paths that return a conservative MayAlias or Mod/Ref result, simply return +whatever the superclass computes. For example: + +.. code-block:: c++ + + AliasAnalysis::AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + if (...) + return NoAlias; + ... + + // Couldn't determine a must or no-alias result. + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + } + +In addition to analysis queries, you must make sure to unconditionally pass LLVM +`update notification`_ methods to the superclass as well if you override them, +which allows all alias analyses in a change to be updated. + +.. _update notification: + +Updating analysis results for transformations +--------------------------------------------- + +Alias analysis information is initially computed for a static snapshot of the +program, but clients will use this information to make transformations to the +code. All but the most trivial forms of alias analysis will need to have their +analysis results updated to reflect the changes made by these transformations. + +The ``AliasAnalysis`` interface exposes four methods which are used to +communicate program changes from the clients to the analysis implementations. +Various alias analysis implementations should use these methods to ensure that +their internal data structures are kept up-to-date as the program changes (for +example, when an instruction is deleted), and clients of alias analysis must be +sure to call these interfaces appropriately. + +The ``deleteValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``deleteValue`` method is called by transformations when they remove an +instruction or any other value from the program (including values that do not +use pointers). Typically alias analyses keep data structures that have entries +for each value in the program. When this method is called, they should remove +any entries for the specified value, if they exist. + +The ``copyValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``copyValue`` method is used when a new value is introduced into the +program. There is no way to introduce a value into the program that did not +exist before (this doesn't make sense for a safe compiler transformation), so +this is the only way to introduce a new value. This method indicates that the +new value has exactly the same properties as the value being copied. + +The ``replaceWithNewValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This method is a simple helper method that is provided to make clients easier to +use. It is implemented by copying the old analysis information to the new +value, then deleting the old value. This method cannot be overridden by alias +analysis implementations. + +The ``addEscapingUse`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``addEscapingUse`` method is used when the uses of a pointer value have +changed in ways that may invalidate precomputed analysis information. +Implementations may either use this callback to provide conservative responses +for points whose uses have change since analysis time, or may recompute some or +all of their internal state to continue providing accurate responses. + +In general, any new use of a pointer value is considered an escaping use, and +must be reported through this callback, *except* for the uses below: + +* A ``bitcast`` or ``getelementptr`` of the pointer +* A ``store`` through the pointer (but not a ``store`` *of* the pointer) +* A ``load`` through the pointer + +Efficiency Issues +----------------- + +From the LLVM perspective, the only thing you need to do to provide an efficient +alias analysis is to make sure that alias analysis **queries** are serviced +quickly. The actual calculation of the alias analysis results (the "run" +method) is only performed once, but many (perhaps duplicate) queries may be +performed. Because of this, try to move as much computation to the run method +as possible (within reason). + +Limitations +----------- + +The AliasAnalysis infrastructure has several limitations which make writing a +new ``AliasAnalysis`` implementation difficult. + +There is no way to override the default alias analysis. It would be very useful +to be able to do something like "``opt -my-aa -O2``" and have it use ``-my-aa`` +for all passes which need AliasAnalysis, but there is currently no support for +that, short of changing the source code and recompiling. Similarly, there is +also no way of setting a chain of analyses as the default. + +There is no way for transform passes to declare that they preserve +``AliasAnalysis`` implementations. The ``AliasAnalysis`` interface includes +``deleteValue`` and ``copyValue`` methods which are intended to allow a pass to +keep an AliasAnalysis consistent, however there's no way for a pass to declare +in its ``getAnalysisUsage`` that it does so. Some passes attempt to use +``AU.addPreserved<AliasAnalysis>``, however this doesn't actually have any +effect. + +``AliasAnalysisCounter`` (``-count-aa``) and ``AliasDebugger`` (``-debug-aa``) +are implemented as ``ModulePass`` classes, so if your alias analysis uses +``FunctionPass``, it won't be able to use these utilities. If you try to use +them, the pass manager will silently route alias analysis queries directly to +``BasicAliasAnalysis`` instead. + +Similarly, the ``opt -p`` option introduces ``ModulePass`` passes between each +pass, which prevents the use of ``FunctionPass`` alias analysis passes. + +The ``AliasAnalysis`` API does have functions for notifying implementations when +values are deleted or copied, however these aren't sufficient. There are many +other ways that LLVM IR can be modified which could be relevant to +``AliasAnalysis`` implementations which can not be expressed. + +The ``AliasAnalysisDebugger`` utility seems to suggest that ``AliasAnalysis`` +implementations can expect that they will be informed of any relevant ``Value`` +before it appears in an alias query. However, popular clients such as ``GVN`` +don't support this, and are known to trigger errors when run with the +``AliasAnalysisDebugger``. + +Due to several of the above limitations, the most obvious use for the +``AliasAnalysisCounter`` utility, collecting stats on all alias queries in a +compilation, doesn't work, even if the ``AliasAnalysis`` implementations don't +use ``FunctionPass``. There's no way to set a default, much less a default +sequence, and there's no way to preserve it. + +The ``AliasSetTracker`` class (which is used by ``LICM``) makes a +non-deterministic number of alias queries. This can cause stats collected by +``AliasAnalysisCounter`` to have fluctuations among identical runs, for +example. Another consequence is that debugging techniques involving pausing +execution after a predetermined number of queries can be unreliable. + +Many alias queries can be reformulated in terms of other alias queries. When +multiple ``AliasAnalysis`` queries are chained together, it would make sense to +start those queries from the beginning of the chain, with care taken to avoid +infinite looping, however currently an implementation which wants to do this can +only start such queries from itself. + +Using alias analysis results +============================ + +There are several different ways to use alias analysis results. In order of +preference, these are: + +Using the ``MemoryDependenceAnalysis`` Pass +------------------------------------------- + +The ``memdep`` pass uses alias analysis to provide high-level dependence +information about memory-using instructions. This will tell you which store +feeds into a load, for example. It uses caching and other techniques to be +efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations. + +.. _AliasSetTracker: + +Using the ``AliasSetTracker`` class +----------------------------------- + +Many transformations need information about alias **sets** that are active in +some scope, rather than information about pairwise aliasing. The +`AliasSetTracker <http://llvm.org/doxygen/classllvm_1_1AliasSetTracker.html>`__ +class is used to efficiently build these Alias Sets from the pairwise alias +analysis information provided by the ``AliasAnalysis`` interface. + +First you initialize the AliasSetTracker by using the "``add``" methods to add +information about various potentially aliasing instructions in the scope you are +interested in. Once all of the alias sets are completed, your pass should +simply iterate through the constructed alias sets, using the ``AliasSetTracker`` +``begin()``/``end()`` methods. + +The ``AliasSet``\s formed by the ``AliasSetTracker`` are guaranteed to be +disjoint, calculate mod/ref information and volatility for the set, and keep +track of whether or not all of the pointers in the set are Must aliases. The +AliasSetTracker also makes sure that sets are properly folded due to call +instructions, and can provide a list of pointers in each set. + +As an example user of this, the `Loop Invariant Code Motion +<doxygen/structLICM.html>`_ pass uses ``AliasSetTracker``\s to calculate alias +sets for each loop nest. If an ``AliasSet`` in a loop is not modified, then all +load instructions from that set may be hoisted out of the loop. If any alias +sets are stored to **and** are must alias sets, then the stores may be sunk +to outside of the loop, promoting the memory location to a register for the +duration of the loop nest. Both of these transformations only apply if the +pointer argument is loop-invariant. + +The AliasSetTracker implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The AliasSetTracker class is implemented to be as efficient as possible. It +uses the union-find algorithm to efficiently merge AliasSets when a pointer is +inserted into the AliasSetTracker that aliases multiple sets. The primary data +structure is a hash table mapping pointers to the AliasSet they are in. + +The AliasSetTracker class must maintain a list of all of the LLVM ``Value*``\s +that are in each AliasSet. Since the hash table already has entries for each +LLVM ``Value*`` of interest, the AliasesSets thread the linked list through +these hash-table nodes to avoid having to allocate memory unnecessarily, and to +make merging alias sets extremely efficient (the linked list merge is constant +time). + +You shouldn't need to understand these details if you are just a client of the +AliasSetTracker, but if you look at the code, hopefully this brief description +will help make sense of why things are designed the way they are. + +Using the ``AliasAnalysis`` interface directly +---------------------------------------------- + +If neither of these utility class are what your pass needs, you should use the +interfaces exposed by the ``AliasAnalysis`` class directly. Try to use the +higher-level methods when possible (e.g., use mod/ref information instead of the +`alias`_ method directly if possible) to get the best precision and efficiency. + +Existing alias analysis implementations and clients +=================================================== + +If you're going to be working with the LLVM alias analysis infrastructure, you +should know what clients and implementations of alias analysis are available. +In particular, if you are implementing an alias analysis, you should be aware of +the `the clients`_ that are useful for monitoring and evaluating different +implementations. + +.. _various alias analysis implementations: + +Available ``AliasAnalysis`` implementations +------------------------------------------- + +This section lists the various implementations of the ``AliasAnalysis`` +interface. With the exception of the `-no-aa`_ implementation, all of these +`chain`_ to other alias analysis implementations. + +.. _no-aa: +.. _-no-aa: + +The ``-no-aa`` pass +^^^^^^^^^^^^^^^^^^^ + +The ``-no-aa`` pass is just like what it sounds: an alias analysis that never +returns any useful information. This pass can be useful if you think that alias +analysis is doing something wrong and are trying to narrow down a problem. + +The ``-basicaa`` pass +^^^^^^^^^^^^^^^^^^^^^ + +The ``-basicaa`` pass is an aggressive local analysis that *knows* many +important facts: + +* Distinct globals, stack allocations, and heap allocations can never alias. +* Globals, stack allocations, and heap allocations never alias the null pointer. +* Different fields of a structure do not alias. +* Indexes into arrays with statically differing subscripts cannot alias. +* Many common standard C library functions `never access memory or only read + memory`_. +* Pointers that obviously point to constant globals "``pointToConstantMemory``". +* Function calls can not modify or references stack allocations if they never + escape from the function that allocates them (a common case for automatic + arrays). + +The ``-globalsmodref-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This pass implements a simple context-sensitive mod/ref and alias analysis for +internal global variables that don't "have their address taken". If a global +does not have its address taken, the pass knows that no pointers alias the +global. This pass also keeps track of functions that it knows never access +memory or never read memory. This allows certain optimizations (e.g. GVN) to +eliminate call instructions entirely. + +The real power of this pass is that it provides context-sensitive mod/ref +information for call instructions. This allows the optimizer to know that calls +to a function do not clobber or read the value of the global, allowing loads and +stores to be eliminated. + +.. note:: + + This pass is somewhat limited in its scope (only support non-address taken + globals), but is very quick analysis. + +The ``-steens-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-steens-aa`` pass implements a variation on the well-known "Steensgaard's +algorithm" for interprocedural alias analysis. Steensgaard's algorithm is a +unification-based, flow-insensitive, context-insensitive, and field-insensitive +alias analysis that is also very scalable (effectively linear time). + +The LLVM ``-steens-aa`` pass implements a "speculatively field-**sensitive**" +version of Steensgaard's algorithm using the Data Structure Analysis framework. +This gives it substantially more precision than the standard algorithm while +maintaining excellent analysis scalability. + +.. note:: + + ``-steens-aa`` is available in the optional "poolalloc" module. It is not part + of the LLVM core. + +The ``-ds-aa`` pass +^^^^^^^^^^^^^^^^^^^ + +The ``-ds-aa`` pass implements the full Data Structure Analysis algorithm. Data +Structure Analysis is a modular unification-based, flow-insensitive, +context-**sensitive**, and speculatively field-**sensitive** alias +analysis that is also quite scalable, usually at ``O(n * log(n))``. + +This algorithm is capable of responding to a full variety of alias analysis +queries, and can provide context-sensitive mod/ref information as well. The +only major facility not implemented so far is support for must-alias +information. + +.. note:: + + ``-ds-aa`` is available in the optional "poolalloc" module. It is not part of + the LLVM core. + +The ``-scev-aa`` pass +^^^^^^^^^^^^^^^^^^^^^ + +The ``-scev-aa`` pass implements AliasAnalysis queries by translating them into +ScalarEvolution queries. This gives it a more complete understanding of +``getelementptr`` instructions and loop induction variables than other alias +analyses have. + +Alias analysis driven transformations +------------------------------------- + +LLVM includes several alias-analysis driven transformations which can be used +with any of the implementations above. + +The ``-adce`` pass +^^^^^^^^^^^^^^^^^^ + +The ``-adce`` pass, which implements Aggressive Dead Code Elimination uses the +``AliasAnalysis`` interface to delete calls to functions that do not have +side-effects and are not used. + +The ``-licm`` pass +^^^^^^^^^^^^^^^^^^ + +The ``-licm`` pass implements various Loop Invariant Code Motion related +transformations. It uses the ``AliasAnalysis`` interface for several different +transformations: + +* It uses mod/ref information to hoist or sink load instructions out of loops if + there are no instructions in the loop that modifies the memory loaded. + +* It uses mod/ref information to hoist function calls out of loops that do not + write to memory and are loop-invariant. + +* If uses alias information to promote memory objects that are loaded and stored + to in loops to live in a register instead. It can do this if there are no may + aliases to the loaded/stored memory location. + +The ``-argpromotion`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-argpromotion`` pass promotes by-reference arguments to be passed in +by-value instead. In particular, if pointer arguments are only loaded from it +passes in the value loaded instead of the address to the function. This pass +uses alias information to make sure that the value loaded from the argument +pointer is not modified between the entry of the function and any load of the +pointer. + +The ``-gvn``, ``-memcpyopt``, and ``-dse`` passes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These passes use AliasAnalysis information to reason about loads and stores. + +.. _the clients: + +Clients for debugging and evaluation of implementations +------------------------------------------------------- + +These passes are useful for evaluating the various alias analysis +implementations. You can use them with commands like: + +.. code-block:: bash + + % opt -ds-aa -aa-eval foo.bc -disable-output -stats + +The ``-print-alias-sets`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-print-alias-sets`` pass is exposed as part of the ``opt`` tool to print +out the Alias Sets formed by the `AliasSetTracker`_ class. This is useful if +you're using the ``AliasSetTracker`` class. To use it, use something like: + +.. code-block:: bash + + % opt -ds-aa -print-alias-sets -disable-output + +The ``-count-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^ + +The ``-count-aa`` pass is useful to see how many queries a particular pass is +making and what responses are returned by the alias analysis. As an example: + +.. code-block:: bash + + % opt -basicaa -count-aa -ds-aa -count-aa -licm + +will print out how many queries (and what responses are returned) by the +``-licm`` pass (of the ``-ds-aa`` pass) and how many queries are made of the +``-basicaa`` pass by the ``-ds-aa`` pass. This can be useful when debugging a +transformation or an alias analysis implementation. + +The ``-aa-eval`` pass +^^^^^^^^^^^^^^^^^^^^^ + +The ``-aa-eval`` pass simply iterates through all pairs of pointers in a +function and asks an alias analysis whether or not the pointers alias. This +gives an indication of the precision of the alias analysis. Statistics are +printed indicating the percent of no/may/must aliases found (a more precise +algorithm will have a lower number of may aliases). + +Memory Dependence Analysis +========================== + +If you're just looking to be a client of alias analysis information, consider +using the Memory Dependence Analysis interface instead. MemDep is a lazy, +caching layer on top of alias analysis that is able to answer the question of +what preceding memory operations a given instruction depends on, either at an +intra- or inter-block level. Because of its laziness and caching policy, using +MemDep can be a significant performance win over accessing alias analysis +directly. diff --git a/docs/Atomics.html b/docs/Atomics.html deleted file mode 100644 index 2358f4d2ef..0000000000 --- a/docs/Atomics.html +++ /dev/null @@ -1,569 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <title>LLVM Atomic Instructions and Concurrency Guide</title> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1> - LLVM Atomic Instructions and Concurrency Guide -</h1> - -<ol> - <li><a href="#introduction">Introduction</a></li> - <li><a href="#outsideatomic">Optimization outside atomic</a></li> - <li><a href="#atomicinst">Atomic instructions</a></li> - <li><a href="#ordering">Atomic orderings</a></li> - <li><a href="#iropt">Atomics and IR optimization</a></li> - <li><a href="#codegen">Atomics and Codegen</a></li> -</ol> - -<div class="doc_author"> - <p>Written by Eli Friedman</p> -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="introduction">Introduction</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Historically, LLVM has not had very strong support for concurrency; some -minimal intrinsics were provided, and <code>volatile</code> was used in some -cases to achieve rough semantics in the presence of concurrency. However, this -is changing; there are now new instructions which are well-defined in the -presence of threads and asynchronous signals, and the model for existing -instructions has been clarified in the IR.</p> - -<p>The atomic instructions are designed specifically to provide readable IR and - optimized code generation for the following:</p> -<ul> - <li>The new C++0x <code><atomic></code> header. - (<a href="http://www.open-std.org/jtc1/sc22/wg21/">C++0x draft available here</a>.) - (<a href="http://www.open-std.org/jtc1/sc22/wg14/">C1x draft available here</a>)</li> - <li>Proper semantics for Java-style memory, for both <code>volatile</code> and - regular shared variables. - (<a href="http://java.sun.com/docs/books/jls/third_edition/html/memory.html">Java Specification</a>)</li> - <li>gcc-compatible <code>__sync_*</code> builtins. - (<a href="http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html">Description</a>)</li> - <li>Other scenarios with atomic semantics, including <code>static</code> - variables with non-trivial constructors in C++.</li> -</ul> - -<p>Atomic and volatile in the IR are orthogonal; "volatile" is the C/C++ - volatile, which ensures that every volatile load and store happens and is - performed in the stated order. A couple examples: if a - SequentiallyConsistent store is immediately followed by another - SequentiallyConsistent store to the same address, the first store can - be erased. This transformation is not allowed for a pair of volatile - stores. On the other hand, a non-volatile non-atomic load can be moved - across a volatile load freely, but not an Acquire load.</p> - -<p>This document is intended to provide a guide to anyone either writing a - frontend for LLVM or working on optimization passes for LLVM with a guide - for how to deal with instructions with special semantics in the presence of - concurrency. This is not intended to be a precise guide to the semantics; - the details can get extremely complicated and unreadable, and are not - usually necessary.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="outsideatomic">Optimization outside atomic</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>The basic <code>'load'</code> and <code>'store'</code> allow a variety of - optimizations, but can lead to undefined results in a concurrent environment; - see <a href="#o_nonatomic">NonAtomic</a>. This section specifically goes - into the one optimizer restriction which applies in concurrent environments, - which gets a bit more of an extended description because any optimization - dealing with stores needs to be aware of it.</p> - -<p>From the optimizer's point of view, the rule is that if there - are not any instructions with atomic ordering involved, concurrency does - not matter, with one exception: if a variable might be visible to another - thread or signal handler, a store cannot be inserted along a path where it - might not execute otherwise. Take the following example:</p> - -<pre> -/* C code, for readability; run through clang -O2 -S -emit-llvm to get - equivalent IR */ -int x; -void f(int* a) { - for (int i = 0; i < 100; i++) { - if (a[i]) - x += 1; - } -} -</pre> - -<p>The following is equivalent in non-concurrent situations:</p> - -<pre> -int x; -void f(int* a) { - int xtemp = x; - for (int i = 0; i < 100; i++) { - if (a[i]) - xtemp += 1; - } - x = xtemp; -} -</pre> - -<p>However, LLVM is not allowed to transform the former to the latter: it could - indirectly introduce undefined behavior if another thread can access x at - the same time. (This example is particularly of interest because before the - concurrency model was implemented, LLVM would perform this - transformation.)</p> - -<p>Note that speculative loads are allowed; a load which - is part of a race returns <code>undef</code>, but does not have undefined - behavior.</p> - - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="atomicinst">Atomic instructions</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>For cases where simple loads and stores are not sufficient, LLVM provides - various atomic instructions. The exact guarantees provided depend on the - ordering; see <a href="#ordering">Atomic orderings</a></p> - -<p><code>load atomic</code> and <code>store atomic</code> provide the same - basic functionality as non-atomic loads and stores, but provide additional - guarantees in situations where threads and signals are involved.</p> - -<p><code>cmpxchg</code> and <code>atomicrmw</code> are essentially like an - atomic load followed by an atomic store (where the store is conditional for - <code>cmpxchg</code>), but no other memory operation can happen on any thread - between the load and store. Note that LLVM's cmpxchg does not provide quite - as many options as the C++0x version.</p> - -<p>A <code>fence</code> provides Acquire and/or Release ordering which is not - part of another operation; it is normally used along with Monotonic memory - operations. A Monotonic load followed by an Acquire fence is roughly - equivalent to an Acquire load.</p> - -<p>Frontends generating atomic instructions generally need to be aware of the - target to some degree; atomic instructions are guaranteed to be lock-free, - and therefore an instruction which is wider than the target natively supports - can be impossible to generate.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="ordering">Atomic orderings</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>In order to achieve a balance between performance and necessary guarantees, - there are six levels of atomicity. They are listed in order of strength; - each level includes all the guarantees of the previous level except for - Acquire/Release. (See also <a href="LangRef.html#ordering">LangRef</a>.)</p> - -<!-- ======================================================================= --> -<h3> - <a name="o_notatomic">NotAtomic</a> -</h3> - -<div> - -<p>NotAtomic is the obvious, a load or store which is not atomic. (This isn't - really a level of atomicity, but is listed here for comparison.) This is - essentially a regular load or store. If there is a race on a given memory - location, loads from that location return undef.</p> - -<dl> - <dt>Relevant standard</dt> - <dd>This is intended to match shared variables in C/C++, and to be used - in any other context where memory access is necessary, and - a race is impossible. (The precise definition is in - <a href="LangRef.html#memmodel">LangRef</a>.) - <dt>Notes for frontends</dt> - <dd>The rule is essentially that all memory accessed with basic loads and - stores by multiple threads should be protected by a lock or other - synchronization; otherwise, you are likely to run into undefined - behavior. If your frontend is for a "safe" language like Java, - use Unordered to load and store any shared variable. Note that NotAtomic - volatile loads and stores are not properly atomic; do not try to use - them as a substitute. (Per the C/C++ standards, volatile does provide - some limited guarantees around asynchronous signals, but atomics are - generally a better solution.) - <dt>Notes for optimizers</dt> - <dd>Introducing loads to shared variables along a codepath where they would - not otherwise exist is allowed; introducing stores to shared variables - is not. See <a href="#outsideatomic">Optimization outside - atomic</a>.</dd> - <dt>Notes for code generation</dt> - <dd>The one interesting restriction here is that it is not allowed to write - to bytes outside of the bytes relevant to a store. This is mostly - relevant to unaligned stores: it is not allowed in general to convert - an unaligned store into two aligned stores of the same width as the - unaligned store. Backends are also expected to generate an i8 store - as an i8 store, and not an instruction which writes to surrounding - bytes. (If you are writing a backend for an architecture which cannot - satisfy these restrictions and cares about concurrency, please send an - email to llvmdev.)</dd> -</dl> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="o_unordered">Unordered</a> -</h3> - -<div> - -<p>Unordered is the lowest level of atomicity. It essentially guarantees that - races produce somewhat sane results instead of having undefined behavior. - It also guarantees the operation to be lock-free, so it do not depend on - the data being part of a special atomic structure or depend on a separate - per-process global lock. Note that code generation will fail for - unsupported atomic operations; if you need such an operation, use explicit - locking.</p> - -<dl> - <dt>Relevant standard</dt> - <dd>This is intended to match the Java memory model for shared - variables.</dd> - <dt>Notes for frontends</dt> - <dd>This cannot be used for synchronization, but is useful for Java and - other "safe" languages which need to guarantee that the generated - code never exhibits undefined behavior. Note that this guarantee - is cheap on common platforms for loads of a native width, but can - be expensive or unavailable for wider loads, like a 64-bit store - on ARM. (A frontend for Java or other "safe" languages would normally - split a 64-bit store on ARM into two 32-bit unordered stores.) - <dt>Notes for optimizers</dt> - <dd>In terms of the optimizer, this prohibits any transformation that - transforms a single load into multiple loads, transforms a store - into multiple stores, narrows a store, or stores a value which - would not be stored otherwise. Some examples of unsafe optimizations - are narrowing an assignment into a bitfield, rematerializing - a load, and turning loads and stores into a memcpy call. Reordering - unordered operations is safe, though, and optimizers should take - advantage of that because unordered operations are common in - languages that need them.</dd> - <dt>Notes for code generation</dt> - <dd>These operations are required to be atomic in the sense that if you - use unordered loads and unordered stores, a load cannot see a value - which was never stored. A normal load or store instruction is usually - sufficient, but note that an unordered load or store cannot - be split into multiple instructions (or an instruction which - does multiple memory operations, like <code>LDRD</code> on ARM).</dd> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="o_monotonic">Monotonic</a> -</h3> - -<div> - -<p>Monotonic is the weakest level of atomicity that can be used in - synchronization primitives, although it does not provide any general - synchronization. It essentially guarantees that if you take all the - operations affecting a specific address, a consistent ordering exists. - -<dl> - <dt>Relevant standard</dt> - <dd>This corresponds to the C++0x/C1x <code>memory_order_relaxed</code>; - see those standards for the exact definition. - <dt>Notes for frontends</dt> - <dd>If you are writing a frontend which uses this directly, use with caution. - The guarantees in terms of synchronization are very weak, so make - sure these are only used in a pattern which you know is correct. - Generally, these would either be used for atomic operations which - do not protect other memory (like an atomic counter), or along with - a <code>fence</code>.</dd> - <dt>Notes for optimizers</dt> - <dd>In terms of the optimizer, this can be treated as a read+write on the - relevant memory location (and alias analysis will take advantage of - that). In addition, it is legal to reorder non-atomic and Unordered - loads around Monotonic loads. CSE/DSE and a few other optimizations - are allowed, but Monotonic operations are unlikely to be used in ways - which would make those optimizations useful.</dd> - <dt>Notes for code generation</dt> - <dd>Code generation is essentially the same as that for unordered for loads - and stores. No fences are required. <code>cmpxchg</code> and - <code>atomicrmw</code> are required to appear as a single operation.</dd> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="o_acquire">Acquire</a> -</h3> - -<div> - -<p>Acquire provides a barrier of the sort necessary to acquire a lock to access - other memory with normal loads and stores. - -<dl> - <dt>Relevant standard</dt> - <dd>This corresponds to the C++0x/C1x <code>memory_order_acquire</code>. It - should also be used for C++0x/C1x <code>memory_order_consume</code>. - <dt>Notes for frontends</dt> - <dd>If you are writing a frontend which uses this directly, use with caution. - Acquire only provides a semantic guarantee when paired with a Release - operation.</dd> - <dt>Notes for optimizers</dt> - <dd>Optimizers not aware of atomics can treat this like a nothrow call. - It is also possible to move stores from before an Acquire load - or read-modify-write operation to after it, and move non-Acquire - loads from before an Acquire operation to after it.</dd> - <dt>Notes for code generation</dt> - <dd>Architectures with weak memory ordering (essentially everything relevant - today except x86 and SPARC) require some sort of fence to maintain - the Acquire semantics. The precise fences required varies widely by - architecture, but for a simple implementation, most architectures provide - a barrier which is strong enough for everything (<code>dmb</code> on ARM, - <code>sync</code> on PowerPC, etc.). Putting such a fence after the - equivalent Monotonic operation is sufficient to maintain Acquire - semantics for a memory operation.</dd> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="o_acquire">Release</a> -</h3> - -<div> - -<p>Release is similar to Acquire, but with a barrier of the sort necessary to - release a lock. - -<dl> - <dt>Relevant standard</dt> - <dd>This corresponds to the C++0x/C1x <code>memory_order_release</code>.</dd> - <dt>Notes for frontends</dt> - <dd>If you are writing a frontend which uses this directly, use with caution. - Release only provides a semantic guarantee when paired with a Acquire - operation.</dd> - <dt>Notes for optimizers</dt> - <dd>Optimizers not aware of atomics can treat this like a nothrow call. - It is also possible to move loads from after a Release store - or read-modify-write operation to before it, and move non-Release - stores from after an Release operation to before it.</dd> - <dt>Notes for code generation</dt> - <dd>See the section on Acquire; a fence before the relevant operation is - usually sufficient for Release. Note that a store-store fence is not - sufficient to implement Release semantics; store-store fences are - generally not exposed to IR because they are extremely difficult to - use correctly.</dd> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="o_acqrel">AcquireRelease</a> -</h3> - -<div> - -<p>AcquireRelease (<code>acq_rel</code> in IR) provides both an Acquire and a - Release barrier (for fences and operations which both read and write memory). - -<dl> - <dt>Relevant standard</dt> - <dd>This corresponds to the C++0x/C1x <code>memory_order_acq_rel</code>. - <dt>Notes for frontends</dt> - <dd>If you are writing a frontend which uses this directly, use with caution. - Acquire only provides a semantic guarantee when paired with a Release - operation, and vice versa.</dd> - <dt>Notes for optimizers</dt> - <dd>In general, optimizers should treat this like a nothrow call; the - the possible optimizations are usually not interesting.</dd> - <dt>Notes for code generation</dt> - <dd>This operation has Acquire and Release semantics; see the sections on - Acquire and Release.</dd> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="o_seqcst">SequentiallyConsistent</a> -</h3> - -<div> - -<p>SequentiallyConsistent (<code>seq_cst</code> in IR) provides - Acquire semantics for loads and Release semantics for - stores. Additionally, it guarantees that a total ordering exists - between all SequentiallyConsistent operations. - -<dl> - <dt>Relevant standard</dt> - <dd>This corresponds to the C++0x/C1x <code>memory_order_seq_cst</code>, - Java volatile, and the gcc-compatible <code>__sync_*</code> builtins - which do not specify otherwise. - <dt>Notes for frontends</dt> - <dd>If a frontend is exposing atomic operations, these are much easier to - reason about for the programmer than other kinds of operations, and using - them is generally a practical performance tradeoff.</dd> - <dt>Notes for optimizers</dt> - <dd>Optimizers not aware of atomics can treat this like a nothrow call. - For SequentiallyConsistent loads and stores, the same reorderings are - allowed as for Acquire loads and Release stores, except that - SequentiallyConsistent operations may not be reordered.</dd> - <dt>Notes for code generation</dt> - <dd>SequentiallyConsistent loads minimally require the same barriers - as Acquire operations and SequentiallyConsistent stores require - Release barriers. Additionally, the code generator must enforce - ordering between SequentiallyConsistent stores followed by - SequentiallyConsistent loads. This is usually done by emitting - either a full fence before the loads or a full fence after the - stores; which is preferred varies by architecture.</dd> -</dl> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="iropt">Atomics and IR optimization</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Predicates for optimizer writers to query: -<ul> - <li>isSimple(): A load or store which is not volatile or atomic. This is - what, for example, memcpyopt would check for operations it might - transform.</li> - <li>isUnordered(): A load or store which is not volatile and at most - Unordered. This would be checked, for example, by LICM before hoisting - an operation.</li> - <li>mayReadFromMemory()/mayWriteToMemory(): Existing predicate, but note - that they return true for any operation which is volatile or at least - Monotonic.</li> - <li>Alias analysis: Note that AA will return ModRef for anything Acquire or - Release, and for the address accessed by any Monotonic operation.</li> -</ul> - -<p>To support optimizing around atomic operations, make sure you are using - the right predicates; everything should work if that is done. If your - pass should optimize some atomic operations (Unordered operations in - particular), make sure it doesn't replace an atomic load or store with - a non-atomic operation.</p> - -<p>Some examples of how optimizations interact with various kinds of atomic - operations: -<ul> - <li>memcpyopt: An atomic operation cannot be optimized into part of a - memcpy/memset, including unordered loads/stores. It can pull operations - across some atomic operations. - <li>LICM: Unordered loads/stores can be moved out of a loop. It just treats - monotonic operations like a read+write to a memory location, and anything - stricter than that like a nothrow call. - <li>DSE: Unordered stores can be DSE'ed like normal stores. Monotonic stores - can be DSE'ed in some cases, but it's tricky to reason about, and not - especially important. - <li>Folding a load: Any atomic load from a constant global can be - constant-folded, because it cannot be observed. Similar reasoning allows - scalarrepl with atomic loads and stores. -</ul> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="codegen">Atomics and Codegen</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Atomic operations are represented in the SelectionDAG with - <code>ATOMIC_*</code> opcodes. On architectures which use barrier - instructions for all atomic ordering (like ARM), appropriate fences are - split out as the DAG is built.</p> - -<p>The MachineMemOperand for all atomic operations is currently marked as - volatile; this is not correct in the IR sense of volatile, but CodeGen - handles anything marked volatile very conservatively. This should get - fixed at some point.</p> - -<p>Common architectures have some way of representing at least a pointer-sized - lock-free <code>cmpxchg</code>; such an operation can be used to implement - all the other atomic operations which can be represented in IR up to that - size. Backends are expected to implement all those operations, but not - operations which cannot be implemented in a lock-free manner. It is - expected that backends will give an error when given an operation which - cannot be implemented. (The LLVM code generator is not very helpful here - at the moment, but hopefully that will change.)</p> - -<p>The implementation of atomics on LL/SC architectures (like ARM) is currently - a bit of a mess; there is a lot of copy-pasted code across targets, and - the representation is relatively unsuited to optimization (it would be nice - to be able to optimize loops involving cmpxchg etc.).</p> - -<p>On x86, all atomic loads generate a <code>MOV</code>. - SequentiallyConsistent stores generate an <code>XCHG</code>, other stores - generate a <code>MOV</code>. SequentiallyConsistent fences generate an - <code>MFENCE</code>, other fences do not cause any code to be generated. - cmpxchg uses the <code>LOCK CMPXCHG</code> instruction. - <code>atomicrmw xchg</code> uses <code>XCHG</code>, - <code>atomicrmw add</code> and <code>atomicrmw sub</code> use - <code>XADD</code>, and all other <code>atomicrmw</code> operations generate - a loop with <code>LOCK CMPXCHG</code>. Depending on the users of the - result, some <code>atomicrmw</code> operations can be translated into - operations like <code>LOCK AND</code>, but that does not work in - general.</p> - -<p>On ARM, MIPS, and many other RISC architectures, Acquire, Release, and - SequentiallyConsistent semantics require barrier instructions - for every such operation. Loads and stores generate normal instructions. - <code>cmpxchg</code> and <code>atomicrmw</code> can be represented using - a loop with LL/SC-style instructions which take some sort of exclusive - lock on a cache line (<code>LDREX</code> and <code>STREX</code> on - ARM, etc.). At the moment, the IR does not provide any way to represent a - weak <code>cmpxchg</code> which would not require a loop.</p> -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date: 2011-08-09 02:07:00 -0700 (Tue, 09 Aug 2011) $ -</address> - -</body> -</html> diff --git a/docs/Atomics.rst b/docs/Atomics.rst new file mode 100644 index 0000000000..db27959073 --- /dev/null +++ b/docs/Atomics.rst @@ -0,0 +1,441 @@ +.. _atomics: + +============================================== +LLVM Atomic Instructions and Concurrency Guide +============================================== + +.. contents:: + :local: + +Introduction +============ + +Historically, LLVM has not had very strong support for concurrency; some minimal +intrinsics were provided, and ``volatile`` was used in some cases to achieve +rough semantics in the presence of concurrency. However, this is changing; +there are now new instructions which are well-defined in the presence of threads +and asynchronous signals, and the model for existing instructions has been +clarified in the IR. + +The atomic instructions are designed specifically to provide readable IR and +optimized code generation for the following: + +* The new C++0x ``<atomic>`` header. (`C++0x draft available here + <http://www.open-std.org/jtc1/sc22/wg21/>`_.) (`C1x draft available here + <http://www.open-std.org/jtc1/sc22/wg14/>`_.) + +* Proper semantics for Java-style memory, for both ``volatile`` and regular + shared variables. (`Java Specification + <http://java.sun.com/docs/books/jls/third_edition/html/memory.html>`_) + +* gcc-compatible ``__sync_*`` builtins. (`Description + <http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html>`_) + +* Other scenarios with atomic semantics, including ``static`` variables with + non-trivial constructors in C++. + +Atomic and volatile in the IR are orthogonal; "volatile" is the C/C++ volatile, +which ensures that every volatile load and store happens and is performed in the +stated order. A couple examples: if a SequentiallyConsistent store is +immediately followed by another SequentiallyConsistent store to the same +address, the first store can be erased. This transformation is not allowed for a +pair of volatile stores. On the other hand, a non-volatile non-atomic load can +be moved across a volatile load freely, but not an Acquire load. + +This document is intended to provide a guide to anyone either writing a frontend +for LLVM or working on optimization passes for LLVM with a guide for how to deal +with instructions with special semantics in the presence of concurrency. This +is not intended to be a precise guide to the semantics; the details can get +extremely complicated and unreadable, and are not usually necessary. + +.. _Optimization outside atomic: + +Optimization outside atomic +=========================== + +The basic ``'load'`` and ``'store'`` allow a variety of optimizations, but can +lead to undefined results in a concurrent environment; see `NotAtomic`_. This +section specifically goes into the one optimizer restriction which applies in +concurrent environments, which gets a bit more of an extended description +because any optimization dealing with stores needs to be aware of it. + +From the optimizer's point of view, the rule is that if there are not any +instructions with atomic ordering involved, concurrency does not matter, with +one exception: if a variable might be visible to another thread or signal +handler, a store cannot be inserted along a path where it might not execute +otherwise. Take the following example: + +.. code-block:: c + + /* C code, for readability; run through clang -O2 -S -emit-llvm to get + equivalent IR */ + int x; + void f(int* a) { + for (int i = 0; i < 100; i++) { + if (a[i]) + x += 1; + } + } + +The following is equivalent in non-concurrent situations: + +.. code-block:: c + + int x; + void f(int* a) { + int xtemp = x; + for (int i = 0; i < 100; i++) { + if (a[i]) + xtemp += 1; + } + x = xtemp; + } + +However, LLVM is not allowed to transform the former to the latter: it could +indirectly introduce undefined behavior if another thread can access ``x`` at +the same time. (This example is particularly of interest because before the +concurrency model was implemented, LLVM would perform this transformation.) + +Note that speculative loads are allowed; a load which is part of a race returns +``undef``, but does not have undefined behavior. + +Atomic instructions +=================== + +For cases where simple loads and stores are not sufficient, LLVM provides +various atomic instructions. The exact guarantees provided depend on the +ordering; see `Atomic orderings`_. + +``load atomic`` and ``store atomic`` provide the same basic functionality as +non-atomic loads and stores, but provide additional guarantees in situations +where threads and signals are involved. + +``cmpxchg`` and ``atomicrmw`` are essentially like an atomic load followed by an +atomic store (where the store is conditional for ``cmpxchg``), but no other +memory operation can happen on any thread between the load and store. Note that +LLVM's cmpxchg does not provide quite as many options as the C++0x version. + +A ``fence`` provides Acquire and/or Release ordering which is not part of +another operation; it is normally used along with Monotonic memory operations. +A Monotonic load followed by an Acquire fence is roughly equivalent to an +Acquire load. + +Frontends generating atomic instructions generally need to be aware of the +target to some degree; atomic instructions are guaranteed to be lock-free, and +therefore an instruction which is wider than the target natively supports can be +impossible to generate. + +.. _Atomic orderings: + +Atomic orderings +================ + +In order to achieve a balance between performance and necessary guarantees, +there are six levels of atomicity. They are listed in order of strength; each +level includes all the guarantees of the previous level except for +Acquire/Release. (See also `LangRef Ordering <LangRef.html#ordering>`_.) + +.. _NotAtomic: + +NotAtomic +--------- + +NotAtomic is the obvious, a load or store which is not atomic. (This isn't +really a level of atomicity, but is listed here for comparison.) This is +essentially a regular load or store. If there is a race on a given memory +location, loads from that location return undef. + +Relevant standard + This is intended to match shared variables in C/C++, and to be used in any + other context where memory access is necessary, and a race is impossible. (The + precise definition is in `LangRef Memory Model <LangRef.html#memmodel>`_.) + +Notes for frontends + The rule is essentially that all memory accessed with basic loads and stores + by multiple threads should be protected by a lock or other synchronization; + otherwise, you are likely to run into undefined behavior. If your frontend is + for a "safe" language like Java, use Unordered to load and store any shared + variable. Note that NotAtomic volatile loads and stores are not properly + atomic; do not try to use them as a substitute. (Per the C/C++ standards, + volatile does provide some limited guarantees around asynchronous signals, but + atomics are generally a better solution.) + +Notes for optimizers + Introducing loads to shared variables along a codepath where they would not + otherwise exist is allowed; introducing stores to shared variables is not. See + `Optimization outside atomic`_. + +Notes for code generation + The one interesting restriction here is that it is not allowed to write to + bytes outside of the bytes relevant to a store. This is mostly relevant to + unaligned stores: it is not allowed in general to convert an unaligned store + into two aligned stores of the same width as the unaligned store. Backends are + also expected to generate an i8 store as an i8 store, and not an instruction + which writes to surrounding bytes. (If you are writing a backend for an + architecture which cannot satisfy these restrictions and cares about + concurrency, please send an email to llvmdev.) + +Unordered +--------- + +Unordered is the lowest level of atomicity. It essentially guarantees that races +produce somewhat sane results instead of having undefined behavior. It also +guarantees the operation to be lock-free, so it do not depend on the data being +part of a special atomic structure or depend on a separate per-process global +lock. Note that code generation will fail for unsupported atomic operations; if +you need such an operation, use explicit locking. + +Relevant standard + This is intended to match the Java memory model for shared variables. + +Notes for frontends + This cannot be used for synchronization, but is useful for Java and other + "safe" languages which need to guarantee that the generated code never + exhibits undefined behavior. Note that this guarantee is cheap on common + platforms for loads of a native width, but can be expensive or unavailable for + wider loads, like a 64-bit store on ARM. (A frontend for Java or other "safe" + languages would normally split a 64-bit store on ARM into two 32-bit unordered + stores.) + +Notes for optimizers + In terms of the optimizer, this prohibits any transformation that transforms a + single load into multiple loads, transforms a store into multiple stores, + narrows a store, or stores a value which would not be stored otherwise. Some + examples of unsafe optimizations are narrowing an assignment into a bitfield, + rematerializing a load, and turning loads and stores into a memcpy + call. Reordering unordered operations is safe, though, and optimizers should + take advantage of that because unordered operations are common in languages + that need them. + +Notes for code generation + These operations are required to be atomic in the sense that if you use + unordered loads and unordered stores, a load cannot see a value which was + never stored. A normal load or store instruction is usually sufficient, but + note that an unordered load or store cannot be split into multiple + instructions (or an instruction which does multiple memory operations, like + ``LDRD`` on ARM). + +Monotonic +--------- + +Monotonic is the weakest level of atomicity that can be used in synchronization +primitives, although it does not provide any general synchronization. It +essentially guarantees that if you take all the operations affecting a specific +address, a consistent ordering exists. + +Relevant standard + This corresponds to the C++0x/C1x ``memory_order_relaxed``; see those + standards for the exact definition. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. The + guarantees in terms of synchronization are very weak, so make sure these are + only used in a pattern which you know is correct. Generally, these would + either be used for atomic operations which do not protect other memory (like + an atomic counter), or along with a ``fence``. + +Notes for optimizers + In terms of the optimizer, this can be treated as a read+write on the relevant + memory location (and alias analysis will take advantage of that). In addition, + it is legal to reorder non-atomic and Unordered loads around Monotonic + loads. CSE/DSE and a few other optimizations are allowed, but Monotonic + operations are unlikely to be used in ways which would make those + optimizations useful. + +Notes for code generation + Code generation is essentially the same as that for unordered for loads and + stores. No fences are required. ``cmpxchg`` and ``atomicrmw`` are required + to appear as a single operation. + +Acquire +------- + +Acquire provides a barrier of the sort necessary to acquire a lock to access +other memory with normal loads and stores. + +Relevant standard + This corresponds to the C++0x/C1x ``memory_order_acquire``. It should also be + used for C++0x/C1x ``memory_order_consume``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Acquire only provides a semantic guarantee when paired with a Release + operation. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. It is + also possible to move stores from before an Acquire load or read-modify-write + operation to after it, and move non-Acquire loads from before an Acquire + operation to after it. + +Notes for code generation + Architectures with weak memory ordering (essentially everything relevant today + except x86 and SPARC) require some sort of fence to maintain the Acquire + semantics. The precise fences required varies widely by architecture, but for + a simple implementation, most architectures provide a barrier which is strong + enough for everything (``dmb`` on ARM, ``sync`` on PowerPC, etc.). Putting + such a fence after the equivalent Monotonic operation is sufficient to + maintain Acquire semantics for a memory operation. + +Release +------- + +Release is similar to Acquire, but with a barrier of the sort necessary to +release a lock. + +Relevant standard + This corresponds to the C++0x/C1x ``memory_order_release``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Release only provides a semantic guarantee when paired with a Acquire + operation. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. It is + also possible to move loads from after a Release store or read-modify-write + operation to before it, and move non-Release stores from after an Release + operation to before it. + +Notes for code generation + See the section on Acquire; a fence before the relevant operation is usually + sufficient for Release. Note that a store-store fence is not sufficient to + implement Release semantics; store-store fences are generally not exposed to + IR because they are extremely difficult to use correctly. + +AcquireRelease +-------------- + +AcquireRelease (``acq_rel`` in IR) provides both an Acquire and a Release +barrier (for fences and operations which both read and write memory). + +Relevant standard + This corresponds to the C++0x/C1x ``memory_order_acq_rel``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Acquire only provides a semantic guarantee when paired with a Release + operation, and vice versa. + +Notes for optimizers + In general, optimizers should treat this like a nothrow call; the the possible + optimizations are usually not interesting. + +Notes for code generation + This operation has Acquire and Release semantics; see the sections on Acquire + and Release. + +SequentiallyConsistent +---------------------- + +SequentiallyConsistent (``seq_cst`` in IR) provides Acquire semantics for loads +and Release semantics for stores. Additionally, it guarantees that a total +ordering exists between all SequentiallyConsistent operations. + +Relevant standard + This corresponds to the C++0x/C1x ``memory_order_seq_cst``, Java volatile, and + the gcc-compatible ``__sync_*`` builtins which do not specify otherwise. + +Notes for frontends + If a frontend is exposing atomic operations, these are much easier to reason + about for the programmer than other kinds of operations, and using them is + generally a practical performance tradeoff. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. For + SequentiallyConsistent loads and stores, the same reorderings are allowed as + for Acquire loads and Release stores, except that SequentiallyConsistent + operations may not be reordered. + +Notes for code generation + SequentiallyConsistent loads minimally require the same barriers as Acquire + operations and SequentiallyConsistent stores require Release + barriers. Additionally, the code generator must enforce ordering between + SequentiallyConsistent stores followed by SequentiallyConsistent loads. This + is usually done by emitting either a full fence before the loads or a full + fence after the stores; which is preferred varies by architecture. + +Atomics and IR optimization +=========================== + +Predicates for optimizer writers to query: + +* ``isSimple()``: A load or store which is not volatile or atomic. This is + what, for example, memcpyopt would check for operations it might transform. + +* ``isUnordered()``: A load or store which is not volatile and at most + Unordered. This would be checked, for example, by LICM before hoisting an + operation. + +* ``mayReadFromMemory()``/``mayWriteToMemory()``: Existing predicate, but note + that they return true for any operation which is volatile or at least + Monotonic. + +* Alias analysis: Note that AA will return ModRef for anything Acquire or + Release, and for the address accessed by any Monotonic operation. + +To support optimizing around atomic operations, make sure you are using the +right predicates; everything should work if that is done. If your pass should +optimize some atomic operations (Unordered operations in particular), make sure +it doesn't replace an atomic load or store with a non-atomic operation. + +Some examples of how optimizations interact with various kinds of atomic +operations: + +* ``memcpyopt``: An atomic operation cannot be optimized into part of a + memcpy/memset, including unordered loads/stores. It can pull operations + across some atomic operations. + +* LICM: Unordered loads/stores can be moved out of a loop. It just treats + monotonic operations like a read+write to a memory location, and anything + stricter than that like a nothrow call. + +* DSE: Unordered stores can be DSE'ed like normal stores. Monotonic stores can + be DSE'ed in some cases, but it's tricky to reason about, and not especially + important. + +* Folding a load: Any atomic load from a constant global can be constant-folded, + because it cannot be observed. Similar reasoning allows scalarrepl with + atomic loads and stores. + +Atomics and Codegen +=================== + +Atomic operations are represented in the SelectionDAG with ``ATOMIC_*`` opcodes. +On architectures which use barrier instructions for all atomic ordering (like +ARM), appropriate fences are split out as the DAG is built. + +The MachineMemOperand for all atomic operations is currently marked as volatile; +this is not correct in the IR sense of volatile, but CodeGen handles anything +marked volatile very conservatively. This should get fixed at some point. + +Common architectures have some way of representing at least a pointer-sized +lock-free ``cmpxchg``; such an operation can be used to implement all the other +atomic operations which can be represented in IR up to that size. Backends are +expected to implement all those operations, but not operations which cannot be +implemented in a lock-free manner. It is expected that backends will give an +error when given an operation which cannot be implemented. (The LLVM code +generator is not very helpful here at the moment, but hopefully that will +change.) + +The implementation of atomics on LL/SC architectures (like ARM) is currently a +bit of a mess; there is a lot of copy-pasted code across targets, and the +representation is relatively unsuited to optimization (it would be nice to be +able to optimize loops involving cmpxchg etc.). + +On x86, all atomic loads generate a ``MOV``. SequentiallyConsistent stores +generate an ``XCHG``, other stores generate a ``MOV``. SequentiallyConsistent +fences generate an ``MFENCE``, other fences do not cause any code to be +generated. cmpxchg uses the ``LOCK CMPXCHG`` instruction. ``atomicrmw xchg`` +uses ``XCHG``, ``atomicrmw add`` and ``atomicrmw sub`` use ``XADD``, and all +other ``atomicrmw`` operations generate a loop with ``LOCK CMPXCHG``. Depending +on the users of the result, some ``atomicrmw`` operations can be translated into +operations like ``LOCK AND``, but that does not work in general. + +On ARM, MIPS, and many other RISC architectures, Acquire, Release, and +SequentiallyConsistent semantics require barrier instructions for every such +operation. Loads and stores generate normal instructions. ``cmpxchg`` and +``atomicrmw`` can be represented using a loop with LL/SC-style instructions +which take some sort of exclusive lock on a cache line (``LDREX`` and ``STREX`` +on ARM, etc.). At the moment, the IR does not provide any way to represent a +weak ``cmpxchg`` which would not require a loop. diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html deleted file mode 100644 index 30145de581..0000000000 --- a/docs/BitCodeFormat.html +++ /dev/null @@ -1,1482 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Bitcode File Format</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> -<h1> LLVM Bitcode File Format</h1> -<ol> - <li><a href="#abstract">Abstract</a></li> - <li><a href="#overview">Overview</a></li> - <li><a href="#bitstream">Bitstream Format</a> - <ol> - <li><a href="#magic">Magic Numbers</a></li> - <li><a href="#primitives">Primitives</a></li> - <li><a href="#abbrevid">Abbreviation IDs</a></li> - <li><a href="#blocks">Blocks</a></li> - <li><a href="#datarecord">Data Records</a></li> - <li><a href="#abbreviations">Abbreviations</a></li> - <li><a href="#stdblocks">Standard Blocks</a></li> - </ol> - </li> - <li><a href="#wrapper">Bitcode Wrapper Format</a> - </li> - <li><a href="#llvmir">LLVM IR Encoding</a> - <ol> - <li><a href="#basics">Basics</a></li> - <li><a href="#MODULE_BLOCK">MODULE_BLOCK Contents</a></li> - <li><a href="#PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a></li> - <li><a href="#TYPE_BLOCK">TYPE_BLOCK Contents</a></li> - <li><a href="#CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a></li> - <li><a href="#FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a></li> - <li><a href="#TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a></li> - <li><a href="#VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a></li> - <li><a href="#METADATA_BLOCK">METADATA_BLOCK Contents</a></li> - <li><a href="#METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a></li> - </ol> - </li> -</ol> -<div class="doc_author"> - <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>, - <a href="http://www.reverberate.org">Joshua Haberman</a>, - and <a href="mailto:housel@acm.org">Peter S. Housel</a>. -</p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="abstract">Abstract</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>This document describes the LLVM bitstream file format and the encoding of -the LLVM IR into it.</p> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="overview">Overview</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p> -What is commonly known as the LLVM bitcode file format (also, sometimes -anachronistically known as bytecode) is actually two things: a <a -href="#bitstream">bitstream container format</a> -and an <a href="#llvmir">encoding of LLVM IR</a> into the container format.</p> - -<p> -The bitstream format is an abstract encoding of structured data, very -similar to XML in some ways. Like XML, bitstream files contain tags, and nested -structures, and you can parse the file without having to understand the tags. -Unlike XML, the bitstream format is a binary encoding, and unlike XML it -provides a mechanism for the file to self-describe "abbreviations", which are -effectively size optimizations for the content.</p> - -<p>LLVM IR files may be optionally embedded into a <a -href="#wrapper">wrapper</a> structure that makes it easy to embed extra data -along with LLVM IR files.</p> - -<p>This document first describes the LLVM bitstream format, describes the -wrapper format, then describes the record structure used by LLVM IR files. -</p> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="bitstream">Bitstream Format</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p> -The bitstream format is literally a stream of bits, with a very simple -structure. This structure consists of the following concepts: -</p> - -<ul> -<li>A "<a href="#magic">magic number</a>" that identifies the contents of - the stream.</li> -<li>Encoding <a href="#primitives">primitives</a> like variable bit-rate - integers.</li> -<li><a href="#blocks">Blocks</a>, which define nested content.</li> -<li><a href="#datarecord">Data Records</a>, which describe entities within the - file.</li> -<li>Abbreviations, which specify compression optimizations for the file.</li> -</ul> - -<p>Note that the <a -href="CommandGuide/html/llvm-bcanalyzer.html">llvm-bcanalyzer</a> tool can be -used to dump and inspect arbitrary bitstreams, which is very useful for -understanding the encoding.</p> - -<!-- ======================================================================= --> -<h3> - <a name="magic">Magic Numbers</a> -</h3> - -<div> - -<p>The first two bytes of a bitcode file are 'BC' (0x42, 0x43). -The second two bytes are an application-specific magic number. Generic -bitcode tools can look at only the first two bytes to verify the file is -bitcode, while application-specific programs will want to look at all four.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="primitives">Primitives</a> -</h3> - -<div> - -<p> -A bitstream literally consists of a stream of bits, which are read in order -starting with the least significant bit of each byte. The stream is made up of a -number of primitive values that encode a stream of unsigned integer values. -These integers are encoded in two ways: either as <a href="#fixedwidth">Fixed -Width Integers</a> or as <a href="#variablewidth">Variable Width -Integers</a>. -</p> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="fixedwidth">Fixed Width Integers</a> -</h4> - -<div> - -<p>Fixed-width integer values have their low bits emitted directly to the file. - For example, a 3-bit integer value encodes 1 as 001. Fixed width integers - are used when there are a well-known number of options for a field. For - example, boolean values are usually encoded with a 1-bit wide integer. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="variablewidth">Variable Width Integers</a> -</h4> - -<div> - -<p>Variable-width integer (VBR) values encode values of arbitrary size, -optimizing for the case where the values are small. Given a 4-bit VBR field, -any 3-bit value (0 through 7) is encoded directly, with the high bit set to -zero. Values larger than N-1 bits emit their bits in a series of N-1 bit -chunks, where all but the last set the high bit.</p> - -<p>For example, the value 27 (0x1B) is encoded as 1011 0011 when emitted as a -vbr4 value. The first set of four bits indicates the value 3 (011) with a -continuation piece (indicated by a high bit of 1). The next word indicates a -value of 24 (011 << 3) with no continuation. The sum (3+24) yields the value -27. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="char6">6-bit characters</a></h4> - -<div> - -<p>6-bit characters encode common characters into a fixed 6-bit field. They -represent the following characters with the following 6-bit values:</p> - -<div class="doc_code"> -<pre> -'a' .. 'z' — 0 .. 25 -'A' .. 'Z' — 26 .. 51 -'0' .. '9' — 52 .. 61 - '.' — 62 - '_' — 63 -</pre> -</div> - -<p>This encoding is only suitable for encoding characters and strings that -consist only of the above characters. It is completely incapable of encoding -characters not in the set.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="wordalign">Word Alignment</a></h4> - -<div> - -<p>Occasionally, it is useful to emit zero bits until the bitstream is a -multiple of 32 bits. This ensures that the bit position in the stream can be -represented as a multiple of 32-bit words.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="abbrevid">Abbreviation IDs</a> -</h3> - -<div> - -<p> -A bitstream is a sequential series of <a href="#blocks">Blocks</a> and -<a href="#datarecord">Data Records</a>. Both of these start with an -abbreviation ID encoded as a fixed-bitwidth field. The width is specified by -the current block, as described below. The value of the abbreviation ID -specifies either a builtin ID (which have special meanings, defined below) or -one of the abbreviation IDs defined for the current block by the stream itself. -</p> - -<p> -The set of builtin abbrev IDs is: -</p> - -<ul> -<li><tt>0 - <a href="#END_BLOCK">END_BLOCK</a></tt> — This abbrev ID marks - the end of the current block.</li> -<li><tt>1 - <a href="#ENTER_SUBBLOCK">ENTER_SUBBLOCK</a></tt> — This - abbrev ID marks the beginning of a new block.</li> -<li><tt>2 - <a href="#DEFINE_ABBREV">DEFINE_ABBREV</a></tt> — This defines - a new abbreviation.</li> -<li><tt>3 - <a href="#UNABBREV_RECORD">UNABBREV_RECORD</a></tt> — This ID - specifies the definition of an unabbreviated record.</li> -</ul> - -<p>Abbreviation IDs 4 and above are defined by the stream itself, and specify -an <a href="#abbrev_records">abbreviated record encoding</a>.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="blocks">Blocks</a> -</h3> - -<div> - -<p> -Blocks in a bitstream denote nested regions of the stream, and are identified by -a content-specific id number (for example, LLVM IR uses an ID of 12 to represent -function bodies). Block IDs 0-7 are reserved for <a href="#stdblocks">standard blocks</a> -whose meaning is defined by Bitcode; block IDs 8 and greater are -application specific. Nested blocks capture the hierarchical structure of the data -encoded in it, and various properties are associated with blocks as the file is -parsed. Block definitions allow the reader to efficiently skip blocks -in constant time if the reader wants a summary of blocks, or if it wants to -efficiently skip data it does not understand. The LLVM IR reader uses this -mechanism to skip function bodies, lazily reading them on demand. -</p> - -<p> -When reading and encoding the stream, several properties are maintained for the -block. In particular, each block maintains: -</p> - -<ol> -<li>A current abbrev id width. This value starts at 2 at the beginning of - the stream, and is set every time a - block record is entered. The block entry specifies the abbrev id width for - the body of the block.</li> - -<li>A set of abbreviations. Abbreviations may be defined within a block, in - which case they are only defined in that block (neither subblocks nor - enclosing blocks see the abbreviation). Abbreviations can also be defined - inside a <tt><a href="#BLOCKINFO">BLOCKINFO</a></tt> block, in which case - they are defined in all blocks that match the ID that the BLOCKINFO block is - describing. -</li> -</ol> - -<p> -As sub blocks are entered, these properties are saved and the new sub-block has -its own set of abbreviations, and its own abbrev id width. When a sub-block is -popped, the saved values are restored. -</p> - -<!-- _______________________________________________________________________ --> -<h4><a name="ENTER_SUBBLOCK">ENTER_SUBBLOCK Encoding</a></h4> - -<div> - -<p><tt>[ENTER_SUBBLOCK, blockid<sub>vbr8</sub>, newabbrevlen<sub>vbr4</sub>, - <align32bits>, blocklen<sub>32</sub>]</tt></p> - -<p> -The <tt>ENTER_SUBBLOCK</tt> abbreviation ID specifies the start of a new block -record. The <tt>blockid</tt> value is encoded as an 8-bit VBR identifier, and -indicates the type of block being entered, which can be -a <a href="#stdblocks">standard block</a> or an application-specific block. -The <tt>newabbrevlen</tt> value is a 4-bit VBR, which specifies the abbrev id -width for the sub-block. The <tt>blocklen</tt> value is a 32-bit aligned value -that specifies the size of the subblock in 32-bit words. This value allows the -reader to skip over the entire block in one jump. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="END_BLOCK">END_BLOCK Encoding</a></h4> - -<div> - -<p><tt>[END_BLOCK, <align32bits>]</tt></p> - -<p> -The <tt>END_BLOCK</tt> abbreviation ID specifies the end of the current block -record. Its end is aligned to 32-bits to ensure that the size of the block is -an even multiple of 32-bits. -</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="datarecord">Data Records</a> -</h3> - -<div> -<p> -Data records consist of a record code and a number of (up to) 64-bit -integer values. The interpretation of the code and values is -application specific and may vary between different block types. -Records can be encoded either using an unabbrev record, or with an -abbreviation. In the LLVM IR format, for example, there is a record -which encodes the target triple of a module. The code is -<tt>MODULE_CODE_TRIPLE</tt>, and the values of the record are the -ASCII codes for the characters in the string. -</p> - -<!-- _______________________________________________________________________ --> -<h4><a name="UNABBREV_RECORD">UNABBREV_RECORD Encoding</a></h4> - -<div> - -<p><tt>[UNABBREV_RECORD, code<sub>vbr6</sub>, numops<sub>vbr6</sub>, - op0<sub>vbr6</sub>, op1<sub>vbr6</sub>, ...]</tt></p> - -<p> -An <tt>UNABBREV_RECORD</tt> provides a default fallback encoding, which is both -completely general and extremely inefficient. It can describe an arbitrary -record by emitting the code and operands as VBRs. -</p> - -<p> -For example, emitting an LLVM IR target triple as an unabbreviated record -requires emitting the <tt>UNABBREV_RECORD</tt> abbrevid, a vbr6 for the -<tt>MODULE_CODE_TRIPLE</tt> code, a vbr6 for the length of the string, which is -equal to the number of operands, and a vbr6 for each character. Because there -are no letters with values less than 32, each letter would need to be emitted as -at least a two-part VBR, which means that each letter would require at least 12 -bits. This is not an efficient encoding, but it is fully general. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="abbrev_records">Abbreviated Record Encoding</a></h4> - -<div> - -<p><tt>[<abbrevid>, fields...]</tt></p> - -<p> -An abbreviated record is a abbreviation id followed by a set of fields that are -encoded according to the <a href="#abbreviations">abbreviation definition</a>. -This allows records to be encoded significantly more densely than records -encoded with the <tt><a href="#UNABBREV_RECORD">UNABBREV_RECORD</a></tt> type, -and allows the abbreviation types to be specified in the stream itself, which -allows the files to be completely self describing. The actual encoding of -abbreviations is defined below. -</p> - -<p>The record code, which is the first field of an abbreviated record, -may be encoded in the abbreviation definition (as a literal -operand) or supplied in the abbreviated record (as a Fixed or VBR -operand value).</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="abbreviations">Abbreviations</a> -</h3> - -<div> -<p> -Abbreviations are an important form of compression for bitstreams. The idea is -to specify a dense encoding for a class of records once, then use that encoding -to emit many records. It takes space to emit the encoding into the file, but -the space is recouped (hopefully plus some) when the records that use it are -emitted. -</p> - -<p> -Abbreviations can be determined dynamically per client, per file. Because the -abbreviations are stored in the bitstream itself, different streams of the same -format can contain different sets of abbreviations according to the needs -of the specific stream. -As a concrete example, LLVM IR files usually emit an abbreviation -for binary operators. If a specific LLVM module contained no or few binary -operators, the abbreviation does not need to be emitted. -</p> - -<!-- _______________________________________________________________________ --> -<h4><a name="DEFINE_ABBREV">DEFINE_ABBREV Encoding</a></h4> - -<div> - -<p><tt>[DEFINE_ABBREV, numabbrevops<sub>vbr5</sub>, abbrevop0, abbrevop1, - ...]</tt></p> - -<p> -A <tt>DEFINE_ABBREV</tt> record adds an abbreviation to the list of currently -defined abbreviations in the scope of this block. This definition only exists -inside this immediate block — it is not visible in subblocks or enclosing -blocks. Abbreviations are implicitly assigned IDs sequentially starting from 4 -(the first application-defined abbreviation ID). Any abbreviations defined in a -<tt>BLOCKINFO</tt> record for the particular block type -receive IDs first, in order, followed by any -abbreviations defined within the block itself. Abbreviated data records -reference this ID to indicate what abbreviation they are invoking. -</p> - -<p> -An abbreviation definition consists of the <tt>DEFINE_ABBREV</tt> abbrevid -followed by a VBR that specifies the number of abbrev operands, then the abbrev -operands themselves. Abbreviation operands come in three forms. They all start -with a single bit that indicates whether the abbrev operand is a literal operand -(when the bit is 1) or an encoding operand (when the bit is 0). -</p> - -<ol> -<li>Literal operands — <tt>[1<sub>1</sub>, litvalue<sub>vbr8</sub>]</tt> -— Literal operands specify that the value in the result is always a single -specific value. This specific value is emitted as a vbr8 after the bit -indicating that it is a literal operand.</li> -<li>Encoding info without data — <tt>[0<sub>1</sub>, - encoding<sub>3</sub>]</tt> — Operand encodings that do not have extra - data are just emitted as their code. -</li> -<li>Encoding info with data — <tt>[0<sub>1</sub>, encoding<sub>3</sub>, -value<sub>vbr5</sub>]</tt> — Operand encodings that do have extra data are -emitted as their code, followed by the extra data. -</li> -</ol> - -<p>The possible operand encodings are:</p> - -<ul> -<li>Fixed (code 1): The field should be emitted as - a <a href="#fixedwidth">fixed-width value</a>, whose width is specified by - the operand's extra data.</li> -<li>VBR (code 2): The field should be emitted as - a <a href="#variablewidth">variable-width value</a>, whose width is - specified by the operand's extra data.</li> -<li>Array (code 3): This field is an array of values. The array operand - has no extra data, but expects another operand to follow it, indicating - the element type of the array. When reading an array in an abbreviated - record, the first integer is a vbr6 that indicates the array length, - followed by the encoded elements of the array. An array may only occur as - the last operand of an abbreviation (except for the one final operand that - gives the array's type).</li> -<li>Char6 (code 4): This field should be emitted as - a <a href="#char6">char6-encoded value</a>. This operand type takes no - extra data. Char6 encoding is normally used as an array element type. - </li> -<li>Blob (code 5): This field is emitted as a vbr6, followed by padding to a - 32-bit boundary (for alignment) and an array of 8-bit objects. The array of - bytes is further followed by tail padding to ensure that its total length is - a multiple of 4 bytes. This makes it very efficient for the reader to - decode the data without having to make a copy of it: it can use a pointer to - the data in the mapped in file and poke directly at it. A blob may only - occur as the last operand of an abbreviation.</li> -</ul> - -<p> -For example, target triples in LLVM modules are encoded as a record of the -form <tt>[TRIPLE, 'a', 'b', 'c', 'd']</tt>. Consider if the bitstream emitted -the following abbrev entry: -</p> - -<div class="doc_code"> -<pre> -[0, Fixed, 4] -[0, Array] -[0, Char6] -</pre> -</div> - -<p> -When emitting a record with this abbreviation, the above entry would be emitted -as: -</p> - -<div class="doc_code"> -<p> -<tt>[4<sub>abbrevwidth</sub>, 2<sub>4</sub>, 4<sub>vbr6</sub>, 0<sub>6</sub>, -1<sub>6</sub>, 2<sub>6</sub>, 3<sub>6</sub>]</tt> -</p> -</div> - -<p>These values are:</p> - -<ol> -<li>The first value, 4, is the abbreviation ID for this abbreviation.</li> -<li>The second value, 2, is the record code for <tt>TRIPLE</tt> records within LLVM IR file <tt>MODULE_BLOCK</tt> blocks.</li> -<li>The third value, 4, is the length of the array.</li> -<li>The rest of the values are the char6 encoded values - for <tt>"abcd"</tt>.</li> -</ol> - -<p> -With this abbreviation, the triple is emitted with only 37 bits (assuming a -abbrev id width of 3). Without the abbreviation, significantly more space would -be required to emit the target triple. Also, because the <tt>TRIPLE</tt> value -is not emitted as a literal in the abbreviation, the abbreviation can also be -used for any other string value. -</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="stdblocks">Standard Blocks</a> -</h3> - -<div> - -<p> -In addition to the basic block structure and record encodings, the bitstream -also defines specific built-in block types. These block types specify how the -stream is to be decoded or other metadata. In the future, new standard blocks -may be added. Block IDs 0-7 are reserved for standard blocks. -</p> - -<!-- _______________________________________________________________________ --> -<h4><a name="BLOCKINFO">#0 - BLOCKINFO Block</a></h4> - -<div> - -<p> -The <tt>BLOCKINFO</tt> block allows the description of metadata for other -blocks. The currently specified records are: -</p> - -<div class="doc_code"> -<pre> -[SETBID (#1), blockid] -[DEFINE_ABBREV, ...] -[BLOCKNAME, ...name...] -[SETRECORDNAME, RecordID, ...name...] -</pre> -</div> - -<p> -The <tt>SETBID</tt> record (code 1) indicates which block ID is being -described. <tt>SETBID</tt> records can occur multiple times throughout the -block to change which block ID is being described. There must be -a <tt>SETBID</tt> record prior to any other records. -</p> - -<p> -Standard <tt>DEFINE_ABBREV</tt> records can occur inside <tt>BLOCKINFO</tt> -blocks, but unlike their occurrence in normal blocks, the abbreviation is -defined for blocks matching the block ID we are describing, <i>not</i> the -<tt>BLOCKINFO</tt> block itself. The abbreviations defined -in <tt>BLOCKINFO</tt> blocks receive abbreviation IDs as described -in <tt><a href="#DEFINE_ABBREV">DEFINE_ABBREV</a></tt>. -</p> - -<p>The <tt>BLOCKNAME</tt> record (code 2) can optionally occur in this block. The elements of -the record are the bytes of the string name of the block. llvm-bcanalyzer can use -this to dump out bitcode files symbolically.</p> - -<p>The <tt>SETRECORDNAME</tt> record (code 3) can also optionally occur in this block. The -first operand value is a record ID number, and the rest of the elements of the record are -the bytes for the string name of the record. llvm-bcanalyzer can use -this to dump out bitcode files symbolically.</p> - -<p> -Note that although the data in <tt>BLOCKINFO</tt> blocks is described as -"metadata," the abbreviations they contain are essential for parsing records -from the corresponding blocks. It is not safe to skip them. -</p> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="wrapper">Bitcode Wrapper Format</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p> -Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper -structure. This structure contains a simple header that indicates the offset -and size of the embedded BC file. This allows additional information to be -stored alongside the BC file. The structure of this file header is: -</p> - -<div class="doc_code"> -<p> -<tt>[Magic<sub>32</sub>, Version<sub>32</sub>, Offset<sub>32</sub>, -Size<sub>32</sub>, CPUType<sub>32</sub>]</tt> -</p> -</div> - -<p> -Each of the fields are 32-bit fields stored in little endian form (as with -the rest of the bitcode file fields). The Magic number is always -<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>. The Offset -field is the offset in bytes to the start of the bitcode stream in the file, and -the Size field is the size in bytes of the stream. CPUType is a target-specific -value that can be used to encode the CPU of the target. -</p> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="llvmir">LLVM IR Encoding</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p> -LLVM IR is encoded into a bitstream by defining blocks and records. It uses -blocks for things like constant pools, functions, symbol tables, etc. It uses -records for things like instructions, global variable descriptors, type -descriptions, etc. This document does not describe the set of abbreviations -that the writer uses, as these are fully self-described in the file, and the -reader is not allowed to build in any knowledge of this. -</p> - -<!-- ======================================================================= --> -<h3> - <a name="basics">Basics</a> -</h3> - -<div> - -<!-- _______________________________________________________________________ --> -<h4><a name="ir_magic">LLVM IR Magic Number</a></h4> - -<div> - -<p> -The magic number for LLVM IR files is: -</p> - -<div class="doc_code"> -<p> -<tt>[0x0<sub>4</sub>, 0xC<sub>4</sub>, 0xE<sub>4</sub>, 0xD<sub>4</sub>]</tt> -</p> -</div> - -<p> -When combined with the bitcode magic number and viewed as bytes, this is -<tt>"BC 0xC0DE"</tt>. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="ir_signed_vbr">Signed VBRs</a></h4> - -<div> - -<p> -<a href="#variablewidth">Variable Width Integer</a> encoding is an efficient way to -encode arbitrary sized unsigned values, but is an extremely inefficient for -encoding signed values, as signed values are otherwise treated as maximally large -unsigned values. -</p> - -<p> -As such, signed VBR values of a specific width are emitted as follows: -</p> - -<ul> -<li>Positive values are emitted as VBRs of the specified width, but with their - value shifted left by one.</li> -<li>Negative values are emitted as VBRs of the specified width, but the negated - value is shifted left by one, and the low bit is set.</li> -</ul> - -<p> -With this encoding, small positive and small negative values can both -be emitted efficiently. Signed VBR encoding is used in -<tt>CST_CODE_INTEGER</tt> and <tt>CST_CODE_WIDE_INTEGER</tt> records -within <tt>CONSTANTS_BLOCK</tt> blocks. -</p> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4><a name="ir_blocks">LLVM IR Blocks</a></h4> - -<div> - -<p> -LLVM IR is defined with the following blocks: -</p> - -<ul> -<li>8 — <a href="#MODULE_BLOCK"><tt>MODULE_BLOCK</tt></a> — This is the top-level block that - contains the entire module, and describes a variety of per-module - information.</li> -<li>9 — <a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a> — This enumerates the parameter - attributes.</li> -<li>10 — <a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a> — This describes all of the types in - the module.</li> -<li>11 — <a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a> — This describes constants for a - module or function.</li> -<li>12 — <a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a> — This describes a function - body.</li> -<li>13 — <a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a> — This describes the type symbol - table.</li> -<li>14 — <a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a> — This describes a value symbol - table.</li> -<li>15 — <a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a> — This describes metadata items.</li> -<li>16 — <a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a> — This contains records associating metadata with function instruction values.</li> -</ul> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="MODULE_BLOCK">MODULE_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>MODULE_BLOCK</tt> block (id 8) is the top-level block for LLVM -bitcode files, and each bitcode file must contain exactly one. In -addition to records (described below) containing information -about the module, a <tt>MODULE_BLOCK</tt> block may contain the -following sub-blocks: -</p> - -<ul> -<li><a href="#BLOCKINFO"><tt>BLOCKINFO</tt></a></li> -<li><a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a></li> -<li><a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a></li> -<li><a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a></li> -<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li> -<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li> -<li><a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a></li> -<li><a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a></li> -</ul> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_VERSION">MODULE_CODE_VERSION Record</a></h4> - -<div> - -<p><tt>[VERSION, version#]</tt></p> - -<p>The <tt>VERSION</tt> record (code 1) contains a single value -indicating the format version. Only version 0 is supported at this -time.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_TRIPLE">MODULE_CODE_TRIPLE Record</a></h4> - -<div> -<p><tt>[TRIPLE, ...string...]</tt></p> - -<p>The <tt>TRIPLE</tt> record (code 2) contains a variable number of -values representing the bytes of the <tt>target triple</tt> -specification string.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_DATALAYOUT">MODULE_CODE_DATALAYOUT Record</a></h4> - -<div> -<p><tt>[DATALAYOUT, ...string...]</tt></p> - -<p>The <tt>DATALAYOUT</tt> record (code 3) contains a variable number of -values representing the bytes of the <tt>target datalayout</tt> -specification string.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_ASM">MODULE_CODE_ASM Record</a></h4> - -<div> -<p><tt>[ASM, ...string...]</tt></p> - -<p>The <tt>ASM</tt> record (code 4) contains a variable number of -values representing the bytes of <tt>module asm</tt> strings, with -individual assembly blocks separated by newline (ASCII 10) characters.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME Record</a></h4> - -<div> -<p><tt>[SECTIONNAME, ...string...]</tt></p> - -<p>The <tt>SECTIONNAME</tt> record (code 5) contains a variable number -of values representing the bytes of a single section name -string. There should be one <tt>SECTIONNAME</tt> record for each -section name referenced (e.g., in global variable or function -<tt>section</tt> attributes) within the module. These records can be -referenced by the 1-based index in the <i>section</i> fields of -<tt>GLOBALVAR</tt> or <tt>FUNCTION</tt> records.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_DEPLIB">MODULE_CODE_DEPLIB Record</a></h4> - -<div> -<p><tt>[DEPLIB, ...string...]</tt></p> - -<p>The <tt>DEPLIB</tt> record (code 6) contains a variable number of -values representing the bytes of a single dependent library name -string, one of the libraries mentioned in a <tt>deplibs</tt> -declaration. There should be one <tt>DEPLIB</tt> record for each -library name referenced.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_GLOBALVAR">MODULE_CODE_GLOBALVAR Record</a></h4> - -<div> -<p><tt>[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal]</tt></p> - -<p>The <tt>GLOBALVAR</tt> record (code 7) marks the declaration or -definition of a global variable. The operand fields are:</p> - -<ul> -<li><i>pointer type</i>: The type index of the pointer type used to point to -this global variable</li> - -<li><i>isconst</i>: Non-zero if the variable is treated as constant within -the module, or zero if it is not</li> - -<li><i>initid</i>: If non-zero, the value index of the initializer for this -variable, plus 1.</li> - -<li><a name="linkage"><i>linkage</i></a>: An encoding of the linkage -type for this variable: - <ul> - <li><tt>external</tt>: code 0</li> - <li><tt>weak</tt>: code 1</li> - <li><tt>appending</tt>: code 2</li> - <li><tt>internal</tt>: code 3</li> - <li><tt>linkonce</tt>: code 4</li> - <li><tt>dllimport</tt>: code 5</li> - <li><tt>dllexport</tt>: code 6</li> - <li><tt>extern_weak</tt>: code 7</li> - <li><tt>common</tt>: code 8</li> - <li><tt>private</tt>: code 9</li> - <li><tt>weak_odr</tt>: code 10</li> - <li><tt>linkonce_odr</tt>: code 11</li> - <li><tt>available_externally</tt>: code 12</li> - <li><tt>linker_private</tt>: code 13</li> - </ul> -</li> - -<li><i>alignment</i>: The logarithm base 2 of the variable's requested -alignment, plus 1</li> - -<li><i>section</i>: If non-zero, the 1-based section index in the -table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a> -entries.</li> - -<li><a name="visibility"><i>visibility</i></a>: If present, an -encoding of the visibility of this variable: - <ul> - <li><tt>default</tt>: code 0</li> - <li><tt>hidden</tt>: code 1</li> - <li><tt>protected</tt>: code 2</li> - </ul> -</li> - -<li><i>threadlocal</i>: If present and non-zero, indicates that the variable -is <tt>thread_local</tt></li> - -<li><i>unnamed_addr</i>: If present and non-zero, indicates that the variable -has <tt>unnamed_addr</tt></li> - -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_FUNCTION">MODULE_CODE_FUNCTION Record</a></h4> - -<div> - -<p><tt>[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]</tt></p> - -<p>The <tt>FUNCTION</tt> record (code 8) marks the declaration or -definition of a function. The operand fields are:</p> - -<ul> -<li><i>type</i>: The type index of the function type describing this function</li> - -<li><i>callingconv</i>: The calling convention number: - <ul> - <li><tt>ccc</tt>: code 0</li> - <li><tt>fastcc</tt>: code 8</li> - <li><tt>coldcc</tt>: code 9</li> - <li><tt>x86_stdcallcc</tt>: code 64</li> - <li><tt>x86_fastcallcc</tt>: code 65</li> - <li><tt>arm_apcscc</tt>: code 66</li> - <li><tt>arm_aapcscc</tt>: code 67</li> - <li><tt>arm_aapcs_vfpcc</tt>: code 68</li> - </ul> -</li> - -<li><i>isproto</i>: Non-zero if this entry represents a declaration -rather than a definition</li> - -<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a> -for this function</li> - -<li><i>paramattr</i>: If nonzero, the 1-based parameter attribute index -into the table of <a href="#PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY</a> -entries.</li> - -<li><i>alignment</i>: The logarithm base 2 of the function's requested -alignment, plus 1</li> - -<li><i>section</i>: If non-zero, the 1-based section index in the -table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a> -entries.</li> - -<li><i>visibility</i>: An encoding of the <a href="#visibility">visibility</a> - of this function</li> - -<li><i>gc</i>: If present and nonzero, the 1-based garbage collector -index in the table of -<a href="#MODULE_CODE_GCNAME">MODULE_CODE_GCNAME</a> entries.</li> - -<li><i>unnamed_addr</i>: If present and non-zero, indicates that the function -has <tt>unnamed_addr</tt></li> - -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_ALIAS">MODULE_CODE_ALIAS Record</a></h4> - -<div> - -<p><tt>[ALIAS, alias type, aliasee val#, linkage, visibility]</tt></p> - -<p>The <tt>ALIAS</tt> record (code 9) marks the definition of an -alias. The operand fields are</p> - -<ul> -<li><i>alias type</i>: The type index of the alias</li> - -<li><i>aliasee val#</i>: The value index of the aliased value</li> - -<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a> -for this alias</li> - -<li><i>visibility</i>: If present, an encoding of the -<a href="#visibility">visibility</a> of the alias</li> - -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_PURGEVALS">MODULE_CODE_PURGEVALS Record</a></h4> - -<div> -<p><tt>[PURGEVALS, numvals]</tt></p> - -<p>The <tt>PURGEVALS</tt> record (code 10) resets the module-level -value list to the size given by the single operand value. Module-level -value list items are added by <tt>GLOBALVAR</tt>, <tt>FUNCTION</tt>, -and <tt>ALIAS</tt> records. After a <tt>PURGEVALS</tt> record is seen, -new value indices will start from the given <i>numvals</i> value.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="MODULE_CODE_GCNAME">MODULE_CODE_GCNAME Record</a></h4> - -<div> -<p><tt>[GCNAME, ...string...]</tt></p> - -<p>The <tt>GCNAME</tt> record (code 11) contains a variable number of -values representing the bytes of a single garbage collector name -string. There should be one <tt>GCNAME</tt> record for each garbage -collector name referenced in function <tt>gc</tt> attributes within -the module. These records can be referenced by 1-based index in the <i>gc</i> -fields of <tt>FUNCTION</tt> records.</p> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) contains a table of -entries describing the attributes of function parameters. These -entries are referenced by 1-based index in the <i>paramattr</i> field -of module block <a name="MODULE_CODE_FUNCTION"><tt>FUNCTION</tt></a> -records, or within the <i>attr</i> field of function block <a -href="#FUNC_CODE_INST_INVOKE"><tt>INST_INVOKE</tt></a> and <a -href="#FUNC_CODE_INST_CALL"><tt>INST_CALL</tt></a> records.</p> - -<p>Entries within <tt>PARAMATTR_BLOCK</tt> are constructed to ensure -that each is unique (i.e., no two indicies represent equivalent -attribute lists). </p> - -<!-- _______________________________________________________________________ --> -<h4><a name="PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY Record</a></h4> - -<div> - -<p><tt>[ENTRY, paramidx0, attr0, paramidx1, attr1...]</tt></p> - -<p>The <tt>ENTRY</tt> record (code 1) contains an even number of -values describing a unique set of function parameter attributes. Each -<i>paramidx</i> value indicates which set of attributes is -represented, with 0 representing the return value attributes, -0xFFFFFFFF representing function attributes, and other values -representing 1-based function parameters. Each <i>attr</i> value is a -bitmap with the following interpretation: -</p> - -<ul> -<li>bit 0: <tt>zeroext</tt></li> -<li>bit 1: <tt>signext</tt></li> -<li>bit 2: <tt>noreturn</tt></li> -<li>bit 3: <tt>inreg</tt></li> -<li>bit 4: <tt>sret</tt></li> -<li>bit 5: <tt>nounwind</tt></li> -<li>bit 6: <tt>noalias</tt></li> -<li>bit 7: <tt>byval</tt></li> -<li>bit 8: <tt>nest</tt></li> -<li>bit 9: <tt>readnone</tt></li> -<li>bit 10: <tt>readonly</tt></li> -<li>bit 11: <tt>noinline</tt></li> -<li>bit 12: <tt>alwaysinline</tt></li> -<li>bit 13: <tt>optsize</tt></li> -<li>bit 14: <tt>ssp</tt></li> -<li>bit 15: <tt>sspreq</tt></li> -<li>bits 16–31: <tt>align <var>n</var></tt></li> -<li>bit 32: <tt>nocapture</tt></li> -<li>bit 33: <tt>noredzone</tt></li> -<li>bit 34: <tt>noimplicitfloat</tt></li> -<li>bit 35: <tt>naked</tt></li> -<li>bit 36: <tt>inlinehint</tt></li> -<li>bits 37–39: <tt>alignstack <var>n</var></tt>, represented as -the logarithm base 2 of the requested alignment, plus 1</li> -</ul> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="TYPE_BLOCK">TYPE_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>TYPE_BLOCK</tt> block (id 10) contains records which -constitute a table of type operator entries used to represent types -referenced within an LLVM module. Each record (with the exception of -<a href="#TYPE_CODE_NUMENTRY"><tt>NUMENTRY</tt></a>) generates a -single type table entry, which may be referenced by 0-based index from -instructions, constants, metadata, type symbol table entries, or other -type operator records. -</p> - -<p>Entries within <tt>TYPE_BLOCK</tt> are constructed to ensure that -each entry is unique (i.e., no two indicies represent structurally -equivalent types). </p> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_NUMENTRY">TYPE_CODE_NUMENTRY Record</a></h4> - -<div> - -<p><tt>[NUMENTRY, numentries]</tt></p> - -<p>The <tt>NUMENTRY</tt> record (code 1) contains a single value which -indicates the total number of type code entries in the type table of -the module. If present, <tt>NUMENTRY</tt> should be the first record -in the block. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_VOID">TYPE_CODE_VOID Record</a></h4> - -<div> - -<p><tt>[VOID]</tt></p> - -<p>The <tt>VOID</tt> record (code 2) adds a <tt>void</tt> type to the -type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_HALF">TYPE_CODE_HALF Record</a></h4> - -<div> - -<p><tt>[HALF]</tt></p> - -<p>The <tt>HALF</tt> record (code 10) adds a <tt>half</tt> (16-bit -floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_FLOAT">TYPE_CODE_FLOAT Record</a></h4> - -<div> - -<p><tt>[FLOAT]</tt></p> - -<p>The <tt>FLOAT</tt> record (code 3) adds a <tt>float</tt> (32-bit -floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_DOUBLE">TYPE_CODE_DOUBLE Record</a></h4> - -<div> - -<p><tt>[DOUBLE]</tt></p> - -<p>The <tt>DOUBLE</tt> record (code 4) adds a <tt>double</tt> (64-bit -floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_LABEL">TYPE_CODE_LABEL Record</a></h4> - -<div> - -<p><tt>[LABEL]</tt></p> - -<p>The <tt>LABEL</tt> record (code 5) adds a <tt>label</tt> type to -the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_OPAQUE">TYPE_CODE_OPAQUE Record</a></h4> - -<div> - -<p><tt>[OPAQUE]</tt></p> - -<p>The <tt>OPAQUE</tt> record (code 6) adds an <tt>opaque</tt> type to -the type table. Note that distinct <tt>opaque</tt> types are not -unified. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_INTEGER">TYPE_CODE_INTEGER Record</a></h4> - -<div> - -<p><tt>[INTEGER, width]</tt></p> - -<p>The <tt>INTEGER</tt> record (code 7) adds an integer type to the -type table. The single <i>width</i> field indicates the width of the -integer type. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_POINTER">TYPE_CODE_POINTER Record</a></h4> - -<div> - -<p><tt>[POINTER, pointee type, address space]</tt></p> - -<p>The <tt>POINTER</tt> record (code 8) adds a pointer type to the -type table. The operand fields are</p> - -<ul> -<li><i>pointee type</i>: The type index of the pointed-to type</li> - -<li><i>address space</i>: If supplied, the target-specific numbered -address space where the pointed-to object resides. Otherwise, the -default address space is zero. -</li> -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_FUNCTION">TYPE_CODE_FUNCTION Record</a></h4> - -<div> - -<p><tt>[FUNCTION, vararg, ignored, retty, ...paramty... ]</tt></p> - -<p>The <tt>FUNCTION</tt> record (code 9) adds a function type to the -type table. The operand fields are</p> - -<ul> -<li><i>vararg</i>: Non-zero if the type represents a varargs function</li> - -<li><i>ignored</i>: This value field is present for backward -compatibility only, and is ignored</li> - -<li><i>retty</i>: The type index of the function's return type</li> - -<li><i>paramty</i>: Zero or more type indices representing the -parameter types of the function</li> -</ul> - -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_STRUCT">TYPE_CODE_STRUCT Record</a></h4> - -<div> - -<p><tt>[STRUCT, ispacked, ...eltty...]</tt></p> - -<p>The <tt>STRUCT </tt> record (code 10) adds a struct type to the -type table. The operand fields are</p> - -<ul> -<li><i>ispacked</i>: Non-zero if the type represents a packed structure</li> - -<li><i>eltty</i>: Zero or more type indices representing the element -types of the structure</li> -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_ARRAY">TYPE_CODE_ARRAY Record</a></h4> - -<div> - -<p><tt>[ARRAY, numelts, eltty]</tt></p> - -<p>The <tt>ARRAY</tt> record (code 11) adds an array type to the type -table. The operand fields are</p> - -<ul> -<li><i>numelts</i>: The number of elements in arrays of this type</li> - -<li><i>eltty</i>: The type index of the array element type</li> -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_VECTOR">TYPE_CODE_VECTOR Record</a></h4> - -<div> - -<p><tt>[VECTOR, numelts, eltty]</tt></p> - -<p>The <tt>VECTOR</tt> record (code 12) adds a vector type to the type -table. The operand fields are</p> - -<ul> -<li><i>numelts</i>: The number of elements in vectors of this type</li> - -<li><i>eltty</i>: The type index of the vector element type</li> -</ul> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_X86_FP80">TYPE_CODE_X86_FP80 Record</a></h4> - -<div> - -<p><tt>[X86_FP80]</tt></p> - -<p>The <tt>X86_FP80</tt> record (code 13) adds an <tt>x86_fp80</tt> (80-bit -floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_FP128">TYPE_CODE_FP128 Record</a></h4> - -<div> - -<p><tt>[FP128]</tt></p> - -<p>The <tt>FP128</tt> record (code 14) adds an <tt>fp128</tt> (128-bit -floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_PPC_FP128">TYPE_CODE_PPC_FP128 Record</a></h4> - -<div> - -<p><tt>[PPC_FP128]</tt></p> - -<p>The <tt>PPC_FP128</tt> record (code 15) adds a <tt>ppc_fp128</tt> -(128-bit floating point) type to the type table. -</p> -</div> - -<!-- _______________________________________________________________________ --> -<h4><a name="TYPE_CODE_METADATA">TYPE_CODE_METADATA Record</a></h4> - -<div> - -<p><tt>[METADATA]</tt></p> - -<p>The <tt>METADATA</tt> record (code 16) adds a <tt>metadata</tt> -type to the type table. -</p> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>CONSTANTS_BLOCK</tt> block (id 11) ... -</p> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>FUNCTION_BLOCK</tt> block (id 12) ... -</p> - -<p>In addition to the record types described below, a -<tt>FUNCTION_BLOCK</tt> block may contain the following sub-blocks: -</p> - -<ul> -<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li> -<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li> -<li><a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a></li> -</ul> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) contains entries which -map between module-level named types and their corresponding type -indices. -</p> - -<!-- _______________________________________________________________________ --> -<h4><a name="TST_CODE_ENTRY">TST_CODE_ENTRY Record</a></h4> - -<div> - -<p><tt>[ENTRY, typeid, ...string...]</tt></p> - -<p>The <tt>ENTRY</tt> record (code 1) contains a variable number of -values, with the first giving the type index of the designated type, -and the remaining values giving the character codes of the type -name. Each entry corresponds to a single named type. -</p> -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>VALUE_SYMTAB_BLOCK</tt> block (id 14) ... -</p> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="METADATA_BLOCK">METADATA_BLOCK Contents</a> -</h3> - -<div> - -<p>The <tt>METADATA_BLOCK</tt> block (id 15) ... -</p> - -</div> - - -<!-- ======================================================================= --> -<h3> - <a name="METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a> -</h3> - -<div> - -<p>The <tt>METADATA_ATTACHMENT</tt> block (id 16) ... -</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<hr> -<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> -<a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> -<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> -Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst new file mode 100644 index 0000000000..d3995e7036 --- /dev/null +++ b/docs/BitCodeFormat.rst @@ -0,0 +1,1045 @@ +.. _bitcode_format: + +.. role:: raw-html(raw) + :format: html + +======================== +LLVM Bitcode File Format +======================== + +.. contents:: + :local: + +Abstract +======== + +This document describes the LLVM bitstream file format and the encoding of the +LLVM IR into it. + +Overview +======== + +What is commonly known as the LLVM bitcode file format (also, sometimes +anachronistically known as bytecode) is actually two things: a `bitstream +container format`_ and an `encoding of LLVM IR`_ into the container format. + +The bitstream format is an abstract encoding of structured data, very similar to +XML in some ways. Like XML, bitstream files contain tags, and nested +structures, and you can parse the file without having to understand the tags. +Unlike XML, the bitstream format is a binary encoding, and unlike XML it +provides a mechanism for the file to self-describe "abbreviations", which are +effectively size optimizations for the content. + +LLVM IR files may be optionally embedded into a `wrapper`_ structure that makes +it easy to embed extra data along with LLVM IR files. + +This document first describes the LLVM bitstream format, describes the wrapper +format, then describes the record structure used by LLVM IR files. + +.. _bitstream container format: + +Bitstream Format +================ + +The bitstream format is literally a stream of bits, with a very simple +structure. This structure consists of the following concepts: + +* A "`magic number`_" that identifies the contents of the stream. + +* Encoding `primitives`_ like variable bit-rate integers. + +* `Blocks`_, which define nested content. + +* `Data Records`_, which describe entities within the file. + +* Abbreviations, which specify compression optimizations for the file. + +Note that the `llvm-bcanalyzer <CommandGuide/html/llvm-bcanalyzer.html>`_ tool +can be used to dump and inspect arbitrary bitstreams, which is very useful for +understanding the encoding. + +.. _magic number: + +Magic Numbers +------------- + +The first two bytes of a bitcode file are 'BC' (``0x42``, ``0x43``). The second +two bytes are an application-specific magic number. Generic bitcode tools can +look at only the first two bytes to verify the file is bitcode, while +application-specific programs will want to look at all four. + +.. _primitives: + +Primitives +---------- + +A bitstream literally consists of a stream of bits, which are read in order +starting with the least significant bit of each byte. The stream is made up of +a number of primitive values that encode a stream of unsigned integer values. +These integers are encoded in two ways: either as `Fixed Width Integers`_ or as +`Variable Width Integers`_. + +.. _Fixed Width Integers: +.. _fixed-width value: + +Fixed Width Integers +^^^^^^^^^^^^^^^^^^^^ + +Fixed-width integer values have their low bits emitted directly to the file. +For example, a 3-bit integer value encodes 1 as 001. Fixed width integers are +used when there are a well-known number of options for a field. For example, +boolean values are usually encoded with a 1-bit wide integer. + +.. _Variable Width Integers: +.. _Variable Width Integer: +.. _variable-width value: + +Variable Width Integers +^^^^^^^^^^^^^^^^^^^^^^^ + +Variable-width integer (VBR) values encode values of arbitrary size, optimizing +for the case where the values are small. Given a 4-bit VBR field, any 3-bit +value (0 through 7) is encoded directly, with the high bit set to zero. Values +larger than N-1 bits emit their bits in a series of N-1 bit chunks, where all +but the last set the high bit. + +For example, the value 27 (0x1B) is encoded as 1011 0011 when emitted as a vbr4 +value. The first set of four bits indicates the value 3 (011) with a +continuation piece (indicated by a high bit of 1). The next word indicates a +value of 24 (011 << 3) with no continuation. The sum (3+24) yields the value +27. + +.. _char6-encoded value: + +6-bit characters +^^^^^^^^^^^^^^^^ + +6-bit characters encode common characters into a fixed 6-bit field. They +represent the following characters with the following 6-bit values: + +:: + + 'a' .. 'z' --- 0 .. 25 + 'A' .. 'Z' --- 26 .. 51 + '0' .. '9' --- 52 .. 61 + '.' --- 62 + '_' --- 63 + +This encoding is only suitable for encoding characters and strings that consist +only of the above characters. It is completely incapable of encoding characters +not in the set. + +Word Alignment +^^^^^^^^^^^^^^ + +Occasionally, it is useful to emit zero bits until the bitstream is a multiple +of 32 bits. This ensures that the bit position in the stream can be represented +as a multiple of 32-bit words. + +Abbreviation IDs +---------------- + +A bitstream is a sequential series of `Blocks`_ and `Data Records`_. Both of +these start with an abbreviation ID encoded as a fixed-bitwidth field. The +width is specified by the current block, as described below. The value of the +abbreviation ID specifies either a builtin ID (which have special meanings, +defined below) or one of the abbreviation IDs defined for the current block by +the stream itself. + +The set of builtin abbrev IDs is: + +* 0 - `END_BLOCK`_ --- This abbrev ID marks the end of the current block. + +* 1 - `ENTER_SUBBLOCK`_ --- This abbrev ID marks the beginning of a new + block. + +* 2 - `DEFINE_ABBREV`_ --- This defines a new abbreviation. + +* 3 - `UNABBREV_RECORD`_ --- This ID specifies the definition of an + unabbreviated record. + +Abbreviation IDs 4 and above are defined by the stream itself, and specify an +`abbreviated record encoding`_. + +.. _Blocks: + +Blocks +------ + +Blocks in a bitstream denote nested regions of the stream, and are identified by +a content-specific id number (for example, LLVM IR uses an ID of 12 to represent +function bodies). Block IDs 0-7 are reserved for `standard blocks`_ whose +meaning is defined by Bitcode; block IDs 8 and greater are application +specific. Nested blocks capture the hierarchical structure of the data encoded +in it, and various properties are associated with blocks as the file is parsed. +Block definitions allow the reader to efficiently skip blocks in constant time +if the reader wants a summary of blocks, or if it wants to efficiently skip data +it does not understand. The LLVM IR reader uses this mechanism to skip function +bodies, lazily reading them on demand. + +When reading and encoding the stream, several properties are maintained for the +block. In particular, each block maintains: + +#. A current abbrev id width. This value starts at 2 at the beginning of the + stream, and is set every time a block record is entered. The block entry + specifies the abbrev id width for the body of the block. + +#. A set of abbreviations. Abbreviations may be defined within a block, in + which case they are only defined in that block (neither subblocks nor + enclosing blocks see the abbreviation). Abbreviations can also be defined + inside a `BLOCKINFO`_ block, in which case they are defined in all blocks + that match the ID that the ``BLOCKINFO`` block is describing. + +As sub blocks are entered, these properties are saved and the new sub-block has +its own set of abbreviations, and its own abbrev id width. When a sub-block is +popped, the saved values are restored. + +.. _ENTER_SUBBLOCK: + +ENTER_SUBBLOCK Encoding +^^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`<tt>` +[ENTER_SUBBLOCK, blockid\ :sub:`vbr8`, newabbrevlen\ :sub:`vbr4`, <align32bits>, blocklen_32] +:raw-html:`</tt>` + +The ``ENTER_SUBBLOCK`` abbreviation ID specifies the start of a new block +record. The ``blockid`` value is encoded as an 8-bit VBR identifier, and +indicates the type of block being entered, which can be a `standard block`_ or +an application-specific block. The ``newabbrevlen`` value is a 4-bit VBR, which +specifies the abbrev id width for the sub-block. The ``blocklen`` value is a +32-bit aligned value that specifies the size of the subblock in 32-bit +words. This value allows the reader to skip over the entire block in one jump. + +.. _END_BLOCK: + +END_BLOCK Encoding +^^^^^^^^^^^^^^^^^^ + +``[END_BLOCK, <align32bits>]`` + +The ``END_BLOCK`` abbreviation ID specifies the end of the current block record. +Its end is aligned to 32-bits to ensure that the size of the block is an even +multiple of 32-bits. + +.. _Data Records: + +Data Records +------------ + +Data records consist of a record code and a number of (up to) 64-bit integer +values. The interpretation of the code and values is application specific and +may vary between different block types. Records can be encoded either using an +unabbrev record, or with an abbreviation. In the LLVM IR format, for example, +there is a record which encodes the target triple of a module. The code is +``MODULE_CODE_TRIPLE``, and the values of the record are the ASCII codes for the +characters in the string. + +.. _UNABBREV_RECORD: + +UNABBREV_RECORD Encoding +^^^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`<tt>` +[UNABBREV_RECORD, code\ :sub:`vbr6`, numops\ :sub:`vbr6`, op0\ :sub:`vbr6`, op1\ :sub:`vbr6`, ...] +:raw-html:`</tt>` + +An ``UNABBREV_RECORD`` provides a default fallback encoding, which is both +completely general and extremely inefficient. It can describe an arbitrary +record by emitting the code and operands as VBRs. + +For example, emitting an LLVM IR target triple as an unabbreviated record +requires emitting the ``UNABBREV_RECORD`` abbrevid, a vbr6 for the +``MODULE_CODE_TRIPLE`` code, a vbr6 for the length of the string, which is equal +to the number of operands, and a vbr6 for each character. Because there are no +letters with values less than 32, each letter would need to be emitted as at +least a two-part VBR, which means that each letter would require at least 12 +bits. This is not an efficient encoding, but it is fully general. + +.. _abbreviated record encoding: + +Abbreviated Record Encoding +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[<abbrevid>, fields...]`` + +An abbreviated record is a abbreviation id followed by a set of fields that are +encoded according to the `abbreviation definition`_. This allows records to be +encoded significantly more densely than records encoded with the +`UNABBREV_RECORD`_ type, and allows the abbreviation types to be specified in +the stream itself, which allows the files to be completely self describing. The +actual encoding of abbreviations is defined below. + +The record code, which is the first field of an abbreviated record, may be +encoded in the abbreviation definition (as a literal operand) or supplied in the +abbreviated record (as a Fixed or VBR operand value). + +.. _abbreviation definition: + +Abbreviations +------------- + +Abbreviations are an important form of compression for bitstreams. The idea is +to specify a dense encoding for a class of records once, then use that encoding +to emit many records. It takes space to emit the encoding into the file, but +the space is recouped (hopefully plus some) when the records that use it are +emitted. + +Abbreviations can be determined dynamically per client, per file. Because the +abbreviations are stored in the bitstream itself, different streams of the same +format can contain different sets of abbreviations according to the needs of the +specific stream. As a concrete example, LLVM IR files usually emit an +abbreviation for binary operators. If a specific LLVM module contained no or +few binary operators, the abbreviation does not need to be emitted. + +.. _DEFINE_ABBREV: + +DEFINE_ABBREV Encoding +^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`<tt>` +[DEFINE_ABBREV, numabbrevops\ :sub:`vbr5`, abbrevop0, abbrevop1, ...] +:raw-html:`</tt>` + +A ``DEFINE_ABBREV`` record adds an abbreviation to the list of currently defined +abbreviations in the scope of this block. This definition only exists inside +this immediate block --- it is not visible in subblocks or enclosing blocks. +Abbreviations are implicitly assigned IDs sequentially starting from 4 (the +first application-defined abbreviation ID). Any abbreviations defined in a +``BLOCKINFO`` record for the particular block type receive IDs first, in order, +followed by any abbreviations defined within the block itself. Abbreviated data +records reference this ID to indicate what abbreviation they are invoking. + +An abbreviation definition consists of the ``DEFINE_ABBREV`` abbrevid followed +by a VBR that specifies the number of abbrev operands, then the abbrev operands +themselves. Abbreviation operands come in three forms. They all start with a +single bit that indicates whether the abbrev operand is a literal operand (when +the bit is 1) or an encoding operand (when the bit is 0). + +#. Literal operands --- :raw-html:`<tt>` [1\ :sub:`1`, litvalue\ + :sub:`vbr8`] :raw-html:`</tt>` --- Literal operands specify that the value in + the result is always a single specific value. This specific value is emitted + as a vbr8 after the bit indicating that it is a literal operand. + +#. Encoding info without data --- :raw-html:`<tt>` [0\ :sub:`1`, encoding\ + :sub:`3`] :raw-html:`</tt>` --- Operand encodings that do not have extra data + are just emitted as their code. + +#. Encoding info with data --- :raw-html:`<tt>` [0\ :sub:`1`, encoding\ + :sub:`3`, value\ :sub:`vbr5`] :raw-html:`</tt>` --- Operand encodings that do + have extra data are emitted as their code, followed by the extra data. + +The possible operand encodings are: + +* Fixed (code 1): The field should be emitted as a `fixed-width value`_, whose + width is specified by the operand's extra data. + +* VBR (code 2): The field should be emitted as a `variable-width value`_, whose + width is specified by the operand's extra data. + +* Array (code 3): This field is an array of values. The array operand has no + extra data, but expects another operand to follow it, indicating the element + type of the array. When reading an array in an abbreviated record, the first + integer is a vbr6 that indicates the array length, followed by the encoded + elements of the array. An array may only occur as the last operand of an + abbreviation (except for the one final operand that gives the array's + type). + +* Char6 (code 4): This field should be emitted as a `char6-encoded value`_. + This operand type takes no extra data. Char6 encoding is normally used as an + array element type. + +* Blob (code 5): This field is emitted as a vbr6, followed by padding to a + 32-bit boundary (for alignment) and an array of 8-bit objects. The array of + bytes is further followed by tail padding to ensure that its total length is a + multiple of 4 bytes. This makes it very efficient for the reader to decode + the data without having to make a copy of it: it can use a pointer to the data + in the mapped in file and poke directly at it. A blob may only occur as the + last operand of an abbreviation. + +For example, target triples in LLVM modules are encoded as a record of the form +``[TRIPLE, 'a', 'b', 'c', 'd']``. Consider if the bitstream emitted the +following abbrev entry: + +:: + + [0, Fixed, 4] + [0, Array] + [0, Char6] + +When emitting a record with this abbreviation, the above entry would be emitted +as: + +:raw-html:`<tt><blockquote>` +[4\ :sub:`abbrevwidth`, 2\ :sub:`4`, 4\ :sub:`vbr6`, 0\ :sub:`6`, 1\ :sub:`6`, 2\ :sub:`6`, 3\ :sub:`6`] +:raw-html:`</blockquote></tt>` + +These values are: + +#. The first value, 4, is the abbreviation ID for this abbreviation. + +#. The second value, 2, is the record code for ``TRIPLE`` records within LLVM IR + file ``MODULE_BLOCK`` blocks. + +#. The third value, 4, is the length of the array. + +#. The rest of the values are the char6 encoded values for ``"abcd"``. + +With this abbreviation, the triple is emitted with only 37 bits (assuming a +abbrev id width of 3). Without the abbreviation, significantly more space would +be required to emit the target triple. Also, because the ``TRIPLE`` value is +not emitted as a literal in the abbreviation, the abbreviation can also be used +for any other string value. + +.. _standard blocks: +.. _standard block: + +Standard Blocks +--------------- + +In addition to the basic block structure and record encodings, the bitstream +also defines specific built-in block types. These block types specify how the +stream is to be decoded or other metadata. In the future, new standard blocks +may be added. Block IDs 0-7 are reserved for standard blocks. + +.. _BLOCKINFO: + +#0 - BLOCKINFO Block +^^^^^^^^^^^^^^^^^^^^ + +The ``BLOCKINFO`` block allows the description of metadata for other blocks. +The currently specified records are: + +:: + + [SETBID (#1), blockid] + [DEFINE_ABBREV, ...] + [BLOCKNAME, ...name...] + [SETRECORDNAME, RecordID, ...name...] + +The ``SETBID`` record (code 1) indicates which block ID is being described. +``SETBID`` records can occur multiple times throughout the block to change which +block ID is being described. There must be a ``SETBID`` record prior to any +other records. + +Standard ``DEFINE_ABBREV`` records can occur inside ``BLOCKINFO`` blocks, but +unlike their occurrence in normal blocks, the abbreviation is defined for blocks +matching the block ID we are describing, *not* the ``BLOCKINFO`` block +itself. The abbreviations defined in ``BLOCKINFO`` blocks receive abbreviation +IDs as described in `DEFINE_ABBREV`_. + +The ``BLOCKNAME`` record (code 2) can optionally occur in this block. The +elements of the record are the bytes of the string name of the block. +llvm-bcanalyzer can use this to dump out bitcode files symbolically. + +The ``SETRECORDNAME`` record (code 3) can also optionally occur in this block. +The first operand value is a record ID number, and the rest of the elements of +the record are the bytes for the string name of the record. llvm-bcanalyzer can +use this to dump out bitcode files symbolically. + +Note that although the data in ``BLOCKINFO`` blocks is described as "metadata," +the abbreviations they contain are essential for parsing records from the +corresponding blocks. It is not safe to skip them. + +.. _wrapper: + +Bitcode Wrapper Format +====================== + +Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper +structure. This structure contains a simple header that indicates the offset +and size of the embedded BC file. This allows additional information to be +stored alongside the BC file. The structure of this file header is: + +:raw-html:`<tt><blockquote>` +[Magic\ :sub:`32`, Version\ :sub:`32`, Offset\ :sub:`32`, Size\ :sub:`32`, CPUType\ :sub:`32`] +:raw-html:`</blockquote></tt>` + +Each of the fields are 32-bit fields stored in little endian form (as with the +rest of the bitcode file fields). The Magic number is always ``0x0B17C0DE`` and +the version is currently always ``0``. The Offset field is the offset in bytes +to the start of the bitcode stream in the file, and the Size field is the size +in bytes of the stream. CPUType is a target-specific value that can be used to +encode the CPU of the target. + +.. _encoding of LLVM IR: + +LLVM IR Encoding +================ + +LLVM IR is encoded into a bitstream by defining blocks and records. It uses +blocks for things like constant pools, functions, symbol tables, etc. It uses +records for things like instructions, global variable descriptors, type +descriptions, etc. This document does not describe the set of abbreviations +that the writer uses, as these are fully self-described in the file, and the +reader is not allowed to build in any knowledge of this. + +Basics +------ + +LLVM IR Magic Number +^^^^^^^^^^^^^^^^^^^^ + +The magic number for LLVM IR files is: + +:raw-html:`<tt><blockquote>` +[0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`] +:raw-html:`</blockquote></tt>` + +When combined with the bitcode magic number and viewed as bytes, this is +``"BC 0xC0DE"``. + +Signed VBRs +^^^^^^^^^^^ + +`Variable Width Integer`_ encoding is an efficient way to encode arbitrary sized +unsigned values, but is an extremely inefficient for encoding signed values, as +signed values are otherwise treated as maximally large unsigned values. + +As such, signed VBR values of a specific width are emitted as follows: + +* Positive values are emitted as VBRs of the specified width, but with their + value shifted left by one. + +* Negative values are emitted as VBRs of the specified width, but the negated + value is shifted left by one, and the low bit is set. + +With this encoding, small positive and small negative values can both be emitted +efficiently. Signed VBR encoding is used in ``CST_CODE_INTEGER`` and +``CST_CODE_WIDE_INTEGER`` records within ``CONSTANTS_BLOCK`` blocks. + +LLVM IR Blocks +^^^^^^^^^^^^^^ + +LLVM IR is defined with the following blocks: + +* 8 --- `MODULE_BLOCK`_ --- This is the top-level block that contains the entire + module, and describes a variety of per-module information. + +* 9 --- `PARAMATTR_BLOCK`_ --- This enumerates the parameter attributes. + +* 10 --- `TYPE_BLOCK`_ --- This describes all of the types in the module. + +* 11 --- `CONSTANTS_BLOCK`_ --- This describes constants for a module or + function. + +* 12 --- `FUNCTION_BLOCK`_ --- This describes a function body. + +* 13 --- `TYPE_SYMTAB_BLOCK`_ --- This describes the type symbol table. + +* 14 --- `VALUE_SYMTAB_BLOCK`_ --- This describes a value symbol table. + +* 15 --- `METADATA_BLOCK`_ --- This describes metadata items. + +* 16 --- `METADATA_ATTACHMENT`_ --- This contains records associating metadata + with function instruction values. + +.. _MODULE_BLOCK: + +MODULE_BLOCK Contents +--------------------- + +The ``MODULE_BLOCK`` block (id 8) is the top-level block for LLVM bitcode files, +and each bitcode file must contain exactly one. In addition to records +(described below) containing information about the module, a ``MODULE_BLOCK`` +block may contain the following sub-blocks: + +* `BLOCKINFO`_ +* `PARAMATTR_BLOCK`_ +* `TYPE_BLOCK`_ +* `TYPE_SYMTAB_BLOCK`_ +* `VALUE_SYMTAB_BLOCK`_ +* `CONSTANTS_BLOCK`_ +* `FUNCTION_BLOCK`_ +* `METADATA_BLOCK`_ + +MODULE_CODE_VERSION Record +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[VERSION, version#]`` + +The ``VERSION`` record (code 1) contains a single value indicating the format +version. Only version 0 is supported at this time. + +MODULE_CODE_TRIPLE Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[TRIPLE, ...string...]`` + +The ``TRIPLE`` record (code 2) contains a variable number of values representing +the bytes of the ``target triple`` specification string. + +MODULE_CODE_DATALAYOUT Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[DATALAYOUT, ...string...]`` + +The ``DATALAYOUT`` record (code 3) contains a variable number of values +representing the bytes of the ``target datalayout`` specification string. + +MODULE_CODE_ASM Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[ASM, ...string...]`` + +The ``ASM`` record (code 4) contains a variable number of values representing +the bytes of ``module asm`` strings, with individual assembly blocks separated +by newline (ASCII 10) characters. + +.. _MODULE_CODE_SECTIONNAME: + +MODULE_CODE_SECTIONNAME Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[SECTIONNAME, ...string...]`` + +The ``SECTIONNAME`` record (code 5) contains a variable number of values +representing the bytes of a single section name string. There should be one +``SECTIONNAME`` record for each section name referenced (e.g., in global +variable or function ``section`` attributes) within the module. These records +can be referenced by the 1-based index in the *section* fields of ``GLOBALVAR`` +or ``FUNCTION`` records. + +MODULE_CODE_DEPLIB Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[DEPLIB, ...string...]`` + +The ``DEPLIB`` record (code 6) contains a variable number of values representing +the bytes of a single dependent library name string, one of the libraries +mentioned in a ``deplibs`` declaration. There should be one ``DEPLIB`` record +for each library name referenced. + +MODULE_CODE_GLOBALVAR Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr]`` + +The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a +global variable. The operand fields are: + +* *pointer type*: The type index of the pointer type used to point to this + global variable + +* *isconst*: Non-zero if the variable is treated as constant within the module, + or zero if it is not + +* *initid*: If non-zero, the value index of the initializer for this variable, + plus 1. + +.. _linkage type: + +* *linkage*: An encoding of the linkage type for this variable: + * ``external``: code 0 + * ``weak``: code 1 + * ``appending``: code 2 + * ``internal``: code 3 + * ``linkonce``: code 4 + * ``dllimport``: code 5 + * ``dllexport``: code 6 + * ``extern_weak``: code 7 + * ``common``: code 8 + * ``private``: code 9 + * ``weak_odr``: code 10 + * ``linkonce_odr``: code 11 + * ``available_externally``: code 12 + * ``linker_private``: code 13 + +* alignment*: The logarithm base 2 of the variable's requested alignment, plus 1 + +* *section*: If non-zero, the 1-based section index in the table of + `MODULE_CODE_SECTIONNAME`_ entries. + +.. _visibility: + +* *visibility*: If present, an encoding of the visibility of this variable: + * ``default``: code 0 + * ``hidden``: code 1 + * ``protected``: code 2 + +* *threadlocal*: If present, an encoding of the thread local storage mode of the + variable: + * ``not thread local``: code 0 + * ``thread local; default TLS model``: code 1 + * ``localdynamic``: code 2 + * ``initialexec``: code 3 + * ``localexec``: code 4 + +* *unnamed_addr*: If present and non-zero, indicates that the variable has + ``unnamed_addr`` + +.. _FUNCTION: + +MODULE_CODE_FUNCTION Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]`` + +The ``FUNCTION`` record (code 8) marks the declaration or definition of a +function. The operand fields are: + +* *type*: The type index of the function type describing this function + +* *callingconv*: The calling convention number: + * ``ccc``: code 0 + * ``fastcc``: code 8 + * ``coldcc``: code 9 + * ``x86_stdcallcc``: code 64 + * ``x86_fastcallcc``: code 65 + * ``arm_apcscc``: code 66 + * ``arm_aapcscc``: code 67 + * ``arm_aapcs_vfpcc``: code 68 + +* isproto*: Non-zero if this entry represents a declaration rather than a + definition + +* *linkage*: An encoding of the `linkage type`_ for this function + +* *paramattr*: If nonzero, the 1-based parameter attribute index into the table + of `PARAMATTR_CODE_ENTRY`_ entries. + +* *alignment*: The logarithm base 2 of the function's requested alignment, plus + 1 + +* *section*: If non-zero, the 1-based section index in the table of + `MODULE_CODE_SECTIONNAME`_ entries. + +* *visibility*: An encoding of the `visibility`_ of this function + +* *gc*: If present and nonzero, the 1-based garbage collector index in the table + of `MODULE_CODE_GCNAME`_ entries. + +* *unnamed_addr*: If present and non-zero, indicates that the function has + ``unnamed_addr`` + +MODULE_CODE_ALIAS Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[ALIAS, alias type, aliasee val#, linkage, visibility]`` + +The ``ALIAS`` record (code 9) marks the definition of an alias. The operand +fields are + +* *alias type*: The type index of the alias + +* *aliasee val#*: The value index of the aliased value + +* *linkage*: An encoding of the `linkage type`_ for this alias + +* *visibility*: If present, an encoding of the `visibility`_ of the alias + +MODULE_CODE_PURGEVALS Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[PURGEVALS, numvals]`` + +The ``PURGEVALS`` record (code 10) resets the module-level value list to the +size given by the single operand value. Module-level value list items are added +by ``GLOBALVAR``, ``FUNCTION``, and ``ALIAS`` records. After a ``PURGEVALS`` +record is seen, new value indices will start from the given *numvals* value. + +.. _MODULE_CODE_GCNAME: + +MODULE_CODE_GCNAME Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[GCNAME, ...string...]`` + +The ``GCNAME`` record (code 11) contains a variable number of values +representing the bytes of a single garbage collector name string. There should +be one ``GCNAME`` record for each garbage collector name referenced in function +``gc`` attributes within the module. These records can be referenced by 1-based +index in the *gc* fields of ``FUNCTION`` records. + +.. _PARAMATTR_BLOCK: + +PARAMATTR_BLOCK Contents +------------------------ + +The ``PARAMATTR_BLOCK`` block (id 9) contains a table of entries describing the +attributes of function parameters. These entries are referenced by 1-based index +in the *paramattr* field of module block `FUNCTION`_ records, or within the +*attr* field of function block ``INST_INVOKE`` and ``INST_CALL`` records. + +Entries within ``PARAMATTR_BLOCK`` are constructed to ensure that each is unique +(i.e., no two indicies represent equivalent attribute lists). + +.. _PARAMATTR_CODE_ENTRY: + +PARAMATTR_CODE_ENTRY Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[ENTRY, paramidx0, attr0, paramidx1, attr1...]`` + +The ``ENTRY`` record (code 1) contains an even number of values describing a +unique set of function parameter attributes. Each *paramidx* value indicates +which set of attributes is represented, with 0 representing the return value +attributes, 0xFFFFFFFF representing function attributes, and other values +representing 1-based function parameters. Each *attr* value is a bitmap with the +following interpretation: + +* bit 0: ``zeroext`` +* bit 1: ``signext`` +* bit 2: ``noreturn`` +* bit 3: ``inreg`` +* bit 4: ``sret`` +* bit 5: ``nounwind`` +* bit 6: ``noalias`` +* bit 7: ``byval`` +* bit 8: ``nest`` +* bit 9: ``readnone`` +* bit 10: ``readonly`` +* bit 11: ``noinline`` +* bit 12: ``alwaysinline`` +* bit 13: ``optsize`` +* bit 14: ``ssp`` +* bit 15: ``sspreq`` +* bits 16-31: ``align n`` +* bit 32: ``nocapture`` +* bit 33: ``noredzone`` +* bit 34: ``noimplicitfloat`` +* bit 35: ``naked`` +* bit 36: ``inlinehint`` +* bits 37-39: ``alignstack n``, represented as the logarithm + base 2 of the requested alignment, plus 1 + +.. _TYPE_BLOCK: + +TYPE_BLOCK Contents +------------------- + +The ``TYPE_BLOCK`` block (id 10) contains records which constitute a table of +type operator entries used to represent types referenced within an LLVM +module. Each record (with the exception of `NUMENTRY`_) generates a single type +table entry, which may be referenced by 0-based index from instructions, +constants, metadata, type symbol table entries, or other type operator records. + +Entries within ``TYPE_BLOCK`` are constructed to ensure that each entry is +unique (i.e., no two indicies represent structurally equivalent types). + +.. _TYPE_CODE_NUMENTRY: +.. _NUMENTRY: + +TYPE_CODE_NUMENTRY Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[NUMENTRY, numentries]`` + +The ``NUMENTRY`` record (code 1) contains a single value which indicates the +total number of type code entries in the type table of the module. If present, +``NUMENTRY`` should be the first record in the block. + +TYPE_CODE_VOID Record +^^^^^^^^^^^^^^^^^^^^^ + +``[VOID]`` + +The ``VOID`` record (code 2) adds a ``void`` type to the type table. + +TYPE_CODE_HALF Record +^^^^^^^^^^^^^^^^^^^^^ + +``[HALF]`` + +The ``HALF`` record (code 10) adds a ``half`` (16-bit floating point) type to +the type table. + +TYPE_CODE_FLOAT Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[FLOAT]`` + +The ``FLOAT`` record (code 3) adds a ``float`` (32-bit floating point) type to +the type table. + +TYPE_CODE_DOUBLE Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[DOUBLE]`` + +The ``DOUBLE`` record (code 4) adds a ``double`` (64-bit floating point) type to +the type table. + +TYPE_CODE_LABEL Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[LABEL]`` + +The ``LABEL`` record (code 5) adds a ``label`` type to the type table. + +TYPE_CODE_OPAQUE Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[OPAQUE]`` + +The ``OPAQUE`` record (code 6) adds an ``opaque`` type to the type table. Note +that distinct ``opaque`` types are not unified. + +TYPE_CODE_INTEGER Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[INTEGER, width]`` + +The ``INTEGER`` record (code 7) adds an integer type to the type table. The +single *width* field indicates the width of the integer type. + +TYPE_CODE_POINTER Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[POINTER, pointee type, address space]`` + +The ``POINTER`` record (code 8) adds a pointer type to the type table. The +operand fields are + +* *pointee type*: The type index of the pointed-to type + +* *address space*: If supplied, the target-specific numbered address space where + the pointed-to object resides. Otherwise, the default address space is zero. + +TYPE_CODE_FUNCTION Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[FUNCTION, vararg, ignored, retty, ...paramty... ]`` + +The ``FUNCTION`` record (code 9) adds a function type to the type table. The +operand fields are + +* *vararg*: Non-zero if the type represents a varargs function + +* *ignored*: This value field is present for backward compatibility only, and is + ignored + +* *retty*: The type index of the function's return type + +* *paramty*: Zero or more type indices representing the parameter types of the + function + +TYPE_CODE_STRUCT Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[STRUCT, ispacked, ...eltty...]`` + +The ``STRUCT`` record (code 10) adds a struct type to the type table. The +operand fields are + +* *ispacked*: Non-zero if the type represents a packed structure + +* *eltty*: Zero or more type indices representing the element types of the + structure + +TYPE_CODE_ARRAY Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[ARRAY, numelts, eltty]`` + +The ``ARRAY`` record (code 11) adds an array type to the type table. The +operand fields are + +* *numelts*: The number of elements in arrays of this type + +* *eltty*: The type index of the array element type + +TYPE_CODE_VECTOR Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[VECTOR, numelts, eltty]`` + +The ``VECTOR`` record (code 12) adds a vector type to the type table. The +operand fields are + +* *numelts*: The number of elements in vectors of this type + +* *eltty*: The type index of the vector element type + +TYPE_CODE_X86_FP80 Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[X86_FP80]`` + +The ``X86_FP80`` record (code 13) adds an ``x86_fp80`` (80-bit floating point) +type to the type table. + +TYPE_CODE_FP128 Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[FP128]`` + +The ``FP128`` record (code 14) adds an ``fp128`` (128-bit floating point) type +to the type table. + +TYPE_CODE_PPC_FP128 Record +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[PPC_FP128]`` + +The ``PPC_FP128`` record (code 15) adds a ``ppc_fp128`` (128-bit floating point) +type to the type table. + +TYPE_CODE_METADATA Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[METADATA]`` + +The ``METADATA`` record (code 16) adds a ``metadata`` type to the type table. + +.. _CONSTANTS_BLOCK: + +CONSTANTS_BLOCK Contents +------------------------ + +The ``CONSTANTS_BLOCK`` block (id 11) ... + +.. _FUNCTION_BLOCK: + +FUNCTION_BLOCK Contents +----------------------- + +The ``FUNCTION_BLOCK`` block (id 12) ... + +In addition to the record types described below, a ``FUNCTION_BLOCK`` block may +contain the following sub-blocks: + +* `CONSTANTS_BLOCK`_ +* `VALUE_SYMTAB_BLOCK`_ +* `METADATA_ATTACHMENT`_ + +.. _TYPE_SYMTAB_BLOCK: + +TYPE_SYMTAB_BLOCK Contents +-------------------------- + +The ``TYPE_SYMTAB_BLOCK`` block (id 13) contains entries which map between +module-level named types and their corresponding type indices. + +.. _TST_CODE_ENTRY: + +TST_CODE_ENTRY Record +^^^^^^^^^^^^^^^^^^^^^ + +``[ENTRY, typeid, ...string...]`` + +The ``ENTRY`` record (code 1) contains a variable number of values, with the +first giving the type index of the designated type, and the remaining values +giving the character codes of the type name. Each entry corresponds to a single +named type. + +.. _VALUE_SYMTAB_BLOCK: + +VALUE_SYMTAB_BLOCK Contents +--------------------------- + +The ``VALUE_SYMTAB_BLOCK`` block (id 14) ... + +.. _METADATA_BLOCK: + +METADATA_BLOCK Contents +----------------------- + +The ``METADATA_BLOCK`` block (id 15) ... + +.. _METADATA_ATTACHMENT: + +METADATA_ATTACHMENT Contents +---------------------------- + +The ``METADATA_ATTACHMENT`` block (id 16) ... diff --git a/docs/BranchWeightMetadata.html b/docs/BranchWeightMetadata.html deleted file mode 100644 index 3a0af29043..0000000000 --- a/docs/BranchWeightMetadata.html +++ /dev/null @@ -1,164 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Branch Weight Metadata</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1> - LLVM Branch Weight Metadata -</h1> - -<ol> - <li><a href="#introduction">Introduction</a></li> - <li><a href="#supported_instructions">Supported Instructions</a></li> - <li><a href="#builtin_expect">Built-in "expect" Instruction </a></li> - <li><a href="#cfg_modifications">CFG Modifications</a></li> -</ol> - -<div class="doc_author"> - <p>Written by <a href="mailto:jstaszak@apple.com">Jakub Staszak</a></p> -</div> - -<h2> - <a name="introduction">Introduction</a> -</h2> -<div> -<p>Branch Weight Metadata represents branch weights as its likeliness to -be taken. Metadata is assigned to the <tt>TerminatorInst</tt> as a -<tt>MDNode</tt> of the <tt>MD_prof</tt> kind. The first operator is always a -<tt>MDString</tt> node with the string "branch_weights". Number of operators -depends on the terminator type.</p> - -<p>Branch weights might be fetch from the profiling file, or generated based on -<a href="#builtin_expect"><tt>__builtin_expect</tt></a> instruction. -</p> - -<p>All weights are represented as an unsigned 32-bit values, where higher value -indicates greater chance to be taken.</p> -</div> - -<h2> - <a name="supported_instructions">Supported Instructions</a> -</h2> - -<div> - <h4>BranchInst</h4> - <div> - <p>Metadata is only assign to the conditional branches. There are two extra - operarands, for the true and the false branch.</p> - </div> - <div class="doc_code"> - <pre> -!0 = metadata !{ - metadata !"branch_weights", - i32 <TRUE_BRANCH_WEIGHT>, - i32 <FALSE_BRANCH_WEIGHT> -} - </pre> - </div> - - <h4>SwitchInst</h4> - <div> - <p>Branch weights are assign to every case (including <tt>default</tt> case - which is always case #0).</p> - </div> - <div class="doc_code"> - <pre> -!0 = metadata !{ - metadata !"branch_weights", - i32 <DEFAULT_BRANCH_WEIGHT> - [ , i32 <CASE_BRANCH_WEIGHT> ... ] -} - </pre> - </div> - - <h4>IndirectBrInst</h4> - <div> - <p>Branch weights are assign to every destination.</p> - </div> - <div class="doc_code"> - <pre> -!0 = metadata !{ - metadata !"branch_weights", - i32 <LABEL_BRANCH_WEIGHT> - [ , i32 <LABEL_BRANCH_WEIGHT> ... ] -} - </pre> - </div> - - <h4>Other</h4> - <div> - <p>Other terminator instructions are not allowed to contain Branch Weight - Metadata.</p> - </div> -</div> - -<h2> - <a name="builtin_expect">Built-in "expect" Instructions</a> -</h2> -<div> - <p><tt>__builtin_expect(long exp, long c)</tt> instruction provides branch - prediction information. The return value is the value of <tt>exp</tt>.</p> - - <p>It is especially useful in conditional statements. Currently Clang supports - two conditional statements: - </p> - <h4><tt>if</tt> statement</h4> - <div> - <p>The <tt>exp</tt> parameter is the condition. The <tt>c</tt> parameter is - the expected comparison value. If it is equal to 1 (true), the condition is - likely to be true, in other case condition is likely to be false. For example: - </p> - </div> - <div class="doc_code"> - <pre> - if (__builtin_expect(x > 0, 1)) { - // This block is likely to be taken. - } - </pre> - </div> - - <h4><tt>switch</tt> statement</h4> - <div> - <p>The <tt>exp</tt> parameter is the value. The <tt>c</tt> parameter is the - expected value. If the expected value doesn't show on the cases list, the - <tt>default</tt> case is assumed to be likely taken.</p> - </div> - <div class="doc_code"> - <pre> - switch (__builtin_expect(x, 5)) { - default: break; - case 0: // ... - case 3: // ... - case 5: // This case is likely to be taken. - } - </pre> - </div> -</div> - -<h2> - <a name="cfg_modifications">CFG Modifications</a> -</h2> -<div> -<p>Branch Weight Metatada is not proof against CFG changes. If terminator -operands' are changed some action should be taken. In other case some -misoptimizations may occur due to incorrent branch prediction information.</p> -</div> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:jstaszak@apple.com">Jakub Staszak</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> -</address> - -</body> -</html> diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst new file mode 100644 index 0000000000..f0df971f87 --- /dev/null +++ b/docs/BranchWeightMetadata.rst @@ -0,0 +1,118 @@ +.. _branch_weight: + +=========================== +LLVM Branch Weight Metadata +=========================== + +.. contents:: + :local: + +Introduction +============ + +Branch Weight Metadata represents branch weights as its likeliness to be +taken. Metadata is assigned to the ``TerminatorInst`` as a ``MDNode`` of the +``MD_prof`` kind. The first operator is always a ``MDString`` node with the +string "branch_weights". Number of operators depends on the terminator type. + +Branch weights might be fetch from the profiling file, or generated based on +`__builtin_expect`_ instruction. + +All weights are represented as an unsigned 32-bit values, where higher value +indicates greater chance to be taken. + +Supported Instructions +====================== + +``BranchInst`` +^^^^^^^^^^^^^^ + +Metadata is only assign to the conditional branches. There are two extra +operarands, for the true and the false branch. + +.. code-block:: llvm + + !0 = metadata !{ + metadata !"branch_weights", + i32 <TRUE_BRANCH_WEIGHT>, + i32 <FALSE_BRANCH_WEIGHT> + } + +``SwitchInst`` +^^^^^^^^^^^^^^ + +Branch weights are assign to every case (including ``default`` case which is +always case #0). + +.. code-block:: llvm + + !0 = metadata !{ + metadata !"branch_weights", + i32 <DEFAULT_BRANCH_WEIGHT> + [ , i32 <CASE_BRANCH_WEIGHT> ... ] + } + +``IndirectBrInst`` +^^^^^^^^^^^^^^^^^^ + +Branch weights are assign to every destination. + +.. code-block:: llvm + + !0 = metadata !{ + metadata !"branch_weights", + i32 <LABEL_BRANCH_WEIGHT> + [ , i32 <LABEL_BRANCH_WEIGHT> ... ] + } + +Other +^^^^^ + +Other terminator instructions are not allowed to contain Branch Weight Metadata. + +.. _\__builtin_expect: + +Built-in ``expect`` Instructions +================================ + +``__builtin_expect(long exp, long c)`` instruction provides branch prediction +information. The return value is the value of ``exp``. + +It is especially useful in conditional statements. Currently Clang supports two +conditional statements: + +``if`` statement +^^^^^^^^^^^^^^^^ + +The ``exp`` parameter is the condition. The ``c`` parameter is the expected +comparison value. If it is equal to 1 (true), the condition is likely to be +true, in other case condition is likely to be false. For example: + +.. code-block:: c++ + + if (__builtin_expect(x > 0, 1)) { + // This block is likely to be taken. + } + +``switch`` statement +^^^^^^^^^^^^^^^^^^^^ + +The ``exp`` parameter is the value. The ``c`` parameter is the expected +value. If the expected value doesn't show on the cases list, the ``default`` +case is assumed to be likely taken. + +.. code-block:: c++ + + switch (__builtin_expect(x, 5)) { + default: break; + case 0: // ... + case 3: // ... + case 5: // This case is likely to be taken. + } + +CFG Modifications +================= + +Branch Weight Metatada is not proof against CFG changes. If terminator operands' +are changed some action should be taken. In other case some misoptimizations may +occur due to incorrent branch prediction information. diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html deleted file mode 100644 index 71f288d772..0000000000 --- a/docs/Bugpoint.html +++ /dev/null @@ -1,316 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM bugpoint tool: design and usage</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> - -<h1> - LLVM bugpoint tool: design and usage -</h1> - -<ul> - <li><a href="#desc">Description</a></li> - <li><a href="#design">Design Philosophy</a> - <ul> - <li><a href="#autoselect">Automatic Debugger Selection</a></li> - <li><a href="#crashdebug">Crash debugger</a></li> - <li><a href="#codegendebug">Code generator debugger</a></li> - <li><a href="#miscompilationdebug">Miscompilation debugger</a></li> - </ul></li> - <li><a href="#advice">Advice for using <tt>bugpoint</tt></a></li> - <li><a href="#notEnough">What to do when <tt>bugpoint</tt> isn't enough</a></li> -</ul> - -<div class="doc_author"> -<p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p> -</div> - -<!-- *********************************************************************** --> -<h2> -<a name="desc">Description</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p><tt>bugpoint</tt> narrows down the source of problems in LLVM tools and -passes. It can be used to debug three types of failures: optimizer crashes, -miscompilations by optimizers, or bad native code generation (including problems -in the static and JIT compilers). It aims to reduce large test cases to small, -useful ones. For example, if <tt>opt</tt> crashes while optimizing a -file, it will identify the optimization (or combination of optimizations) that -causes the crash, and reduce the file down to a small example which triggers the -crash.</p> - -<p>For detailed case scenarios, such as debugging <tt>opt</tt>, or one of the -LLVM code generators, see <a href="HowToSubmitABug.html">How To Submit a Bug -Report document</a>.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> -<a name="design">Design Philosophy</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p><tt>bugpoint</tt> is designed to be a useful tool without requiring any -hooks into the LLVM infrastructure at all. It works with any and all LLVM -passes and code generators, and does not need to "know" how they work. Because -of this, it may appear to do stupid things or miss obvious -simplifications. <tt>bugpoint</tt> is also designed to trade off programmer -time for computer time in the compiler-debugging process; consequently, it may -take a long period of (unattended) time to reduce a test case, but we feel it -is still worth it. Note that <tt>bugpoint</tt> is generally very quick unless -debugging a miscompilation where each test of the program (which requires -executing it) takes a long time.</p> - -<!-- ======================================================================= --> -<h3> - <a name="autoselect">Automatic Debugger Selection</a> -</h3> - -<div> - -<p><tt>bugpoint</tt> reads each <tt>.bc</tt> or <tt>.ll</tt> file specified on -the command line and links them together into a single module, called the test -program. If any LLVM passes are specified on the command line, it runs these -passes on the test program. If any of the passes crash, or if they produce -malformed output (which causes the verifier to abort), <tt>bugpoint</tt> starts -the <a href="#crashdebug">crash debugger</a>.</p> - -<p>Otherwise, if the <tt>-output</tt> option was not specified, -<tt>bugpoint</tt> runs the test program with the C backend (which is assumed to -generate good code) to generate a reference output. Once <tt>bugpoint</tt> has -a reference output for the test program, it tries executing it with the -selected code generator. If the selected code generator crashes, -<tt>bugpoint</tt> starts the <a href="#crashdebug">crash debugger</a> on the -code generator. Otherwise, if the resulting output differs from the reference -output, it assumes the difference resulted from a code generator failure, and -starts the <a href="#codegendebug">code generator debugger</a>.</p> - -<p>Finally, if the output of the selected code generator matches the reference -output, <tt>bugpoint</tt> runs the test program after all of the LLVM passes -have been applied to it. If its output differs from the reference output, it -assumes the difference resulted from a failure in one of the LLVM passes, and -enters the <a href="#miscompilationdebug">miscompilation debugger</a>. -Otherwise, there is no problem <tt>bugpoint</tt> can debug.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="crashdebug">Crash debugger</a> -</h3> - -<div> - -<p>If an optimizer or code generator crashes, <tt>bugpoint</tt> will try as hard -as it can to reduce the list of passes (for optimizer crashes) and the size of -the test program. First, <tt>bugpoint</tt> figures out which combination of -optimizer passes triggers the bug. This is useful when debugging a problem -exposed by <tt>opt</tt>, for example, because it runs over 38 passes.</p> - -<p>Next, <tt>bugpoint</tt> tries removing functions from the test program, to -reduce its size. Usually it is able to reduce a test program to a single -function, when debugging intraprocedural optimizations. Once the number of -functions has been reduced, it attempts to delete various edges in the control -flow graph, to reduce the size of the function as much as possible. Finally, -<tt>bugpoint</tt> deletes any individual LLVM instructions whose absence does -not eliminate the failure. At the end, <tt>bugpoint</tt> should tell you what -passes crash, give you a bitcode file, and give you instructions on how to -reproduce the failure with <tt>opt</tt> or <tt>llc</tt>.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="codegendebug">Code generator debugger</a> -</h3> - -<div> - -<p>The code generator debugger attempts to narrow down the amount of code that -is being miscompiled by the selected code generator. To do this, it takes the -test program and partitions it into two pieces: one piece which it compiles -with the C backend (into a shared object), and one piece which it runs with -either the JIT or the static LLC compiler. It uses several techniques to -reduce the amount of code pushed through the LLVM code generator, to reduce the -potential scope of the problem. After it is finished, it emits two bitcode -files (called "test" [to be compiled with the code generator] and "safe" [to be -compiled with the C backend], respectively), and instructions for reproducing -the problem. The code generator debugger assumes that the C backend produces -good code.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="miscompilationdebug">Miscompilation debugger</a> -</h3> - -<div> - -<p>The miscompilation debugger works similarly to the code generator debugger. -It works by splitting the test program into two pieces, running the -optimizations specified on one piece, linking the two pieces back together, and -then executing the result. It attempts to narrow down the list of passes to -the one (or few) which are causing the miscompilation, then reduce the portion -of the test program which is being miscompiled. The miscompilation debugger -assumes that the selected code generator is working properly.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="advice">Advice for using bugpoint</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<tt>bugpoint</tt> can be a remarkably useful tool, but it sometimes works in -non-obvious ways. Here are some hints and tips:<p> - -<ol> -<li>In the code generator and miscompilation debuggers, <tt>bugpoint</tt> only - works with programs that have deterministic output. Thus, if the program - outputs <tt>argv[0]</tt>, the date, time, or any other "random" data, - <tt>bugpoint</tt> may misinterpret differences in these data, when output, - as the result of a miscompilation. Programs should be temporarily modified - to disable outputs that are likely to vary from run to run. - -<li>In the code generator and miscompilation debuggers, debugging will go - faster if you manually modify the program or its inputs to reduce the - runtime, but still exhibit the problem. - -<li><tt>bugpoint</tt> is extremely useful when working on a new optimization: - it helps track down regressions quickly. To avoid having to relink - <tt>bugpoint</tt> every time you change your optimization however, have - <tt>bugpoint</tt> dynamically load your optimization with the - <tt>-load</tt> option. - -<li><p><tt>bugpoint</tt> can generate a lot of output and run for a long period - of time. It is often useful to capture the output of the program to file. - For example, in the C shell, you can run:</p> - -<div class="doc_code"> -<p><tt>bugpoint ... |& tee bugpoint.log</tt></p> -</div> - - <p>to get a copy of <tt>bugpoint</tt>'s output in the file - <tt>bugpoint.log</tt>, as well as on your terminal.</p> - -<li><tt>bugpoint</tt> cannot debug problems with the LLVM linker. If - <tt>bugpoint</tt> crashes before you see its "All input ok" message, - you might try <tt>llvm-link -v</tt> on the same set of input files. If - that also crashes, you may be experiencing a linker bug. - -<li><tt>bugpoint</tt> is useful for proactively finding bugs in LLVM. - Invoking <tt>bugpoint</tt> with the <tt>-find-bugs</tt> option will cause - the list of specified optimizations to be randomized and applied to the - program. This process will repeat until a bug is found or the user - kills <tt>bugpoint</tt>. -</ol> - -</div> -<!-- *********************************************************************** --> -<h2> - <a name="notEnough">What to do when bugpoint isn't enough</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Sometimes, <tt>bugpoint</tt> is not enough. In particular, InstCombine and -TargetLowering both have visitor structured code with lots of potential -transformations. If the process of using bugpoint has left you with -still too much code to figure out and the problem seems -to be in instcombine, the following steps may help. These same techniques -are useful with TargetLowering as well.</p> - -<p>Turn on <tt>-debug-only=instcombine</tt> and see which transformations -within instcombine are firing by selecting out lines with "<tt>IC</tt>" -in them.</p> - -<p>At this point, you have a decision to make. Is the number -of transformations small enough to step through them using a debugger? -If so, then try that.</p> - -<p>If there are too many transformations, then a source modification -approach may be helpful. -In this approach, you can modify the source code of instcombine -to disable just those transformations that are being performed on your -test input and perform a binary search over the set of transformations. -One set of places to modify are the "<tt>visit*</tt>" methods of -<tt>InstCombiner</tt> (<I>e.g.</I> <tt>visitICmpInst</tt>) by adding a -"<tt>return false</tt>" as the first line of the method.</p> - -<p>If that still doesn't remove enough, then change the caller of -<tt>InstCombiner::DoOneIteration</tt>, <tt>InstCombiner::runOnFunction</tt> -to limit the number of iterations.</p> - -<p>You may also find it useful to use "<tt>-stats</tt>" now to see what parts -of instcombine are firing. This can guide where to put additional reporting -code.</p> - -<p>At this point, if the amount of transformations is still too large, then -inserting code to limit whether or not to execute the body of the code -in the visit function can be helpful. Add a static counter which is -incremented on every invocation of the function. Then add code which -simply returns false on desired ranges. For example:</p> - -<div class="doc_code"> -<p><tt>static int calledCount = 0;</tt></p> -<p><tt>calledCount++;</tt></p> -<p><tt>DEBUG(if (calledCount < 212) return false);</tt></p> -<p><tt>DEBUG(if (calledCount > 217) return false);</tt></p> -<p><tt>DEBUG(if (calledCount == 213) return false);</tt></p> -<p><tt>DEBUG(if (calledCount == 214) return false);</tt></p> -<p><tt>DEBUG(if (calledCount == 215) return false);</tt></p> -<p><tt>DEBUG(if (calledCount == 216) return false);</tt></p> -<p><tt>DEBUG(dbgs() << "visitXOR calledCount: " << calledCount - << "\n");</tt></p> -<p><tt>DEBUG(dbgs() << "I: "; I->dump());</tt></p> -</div> - -<p>could be added to <tt>visitXOR</tt> to limit <tt>visitXor</tt> to being -applied only to calls 212 and 217. This is from an actual test case and raises -an important point---a simple binary search may not be sufficient, as -transformations that interact may require isolating more than one call. -In TargetLowering, use <tt>return SDNode();</tt> instead of -<tt>return false;</tt>.</p> - -<p>Now that that the number of transformations is down to a manageable -number, try examining the output to see if you can figure out which -transformations are being done. If that can be figured out, then -do the usual debugging. If which code corresponds to the transformation -being performed isn't obvious, set a breakpoint after the call count -based disabling and step through the code. Alternatively, you can use -"printf" style debugging to report waypoints.</p> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/Bugpoint.rst b/docs/Bugpoint.rst new file mode 100644 index 0000000000..9ccf0cc2d9 --- /dev/null +++ b/docs/Bugpoint.rst @@ -0,0 +1,218 @@ +.. _bugpoint: + +==================================== +LLVM bugpoint tool: design and usage +==================================== + +.. contents:: + :local: + +Description +=========== + +``bugpoint`` narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For example, if ``opt`` crashes while optimizing a file, it will identify the +optimization (or combination of optimizations) that causes the crash, and reduce +the file down to a small example which triggers the crash. + +For detailed case scenarios, such as debugging ``opt``, or one of the LLVM code +generators, see `How To Submit a Bug Report document <HowToSubmitABug.html>`_. + +Design Philosophy +================= + +``bugpoint`` is designed to be a useful tool without requiring any hooks into +the LLVM infrastructure at all. It works with any and all LLVM passes and code +generators, and does not need to "know" how they work. Because of this, it may +appear to do stupid things or miss obvious simplifications. ``bugpoint`` is +also designed to trade off programmer time for computer time in the +compiler-debugging process; consequently, it may take a long period of +(unattended) time to reduce a test case, but we feel it is still worth it. Note +that ``bugpoint`` is generally very quick unless debugging a miscompilation +where each test of the program (which requires executing it) takes a long time. + +Automatic Debugger Selection +---------------------------- + +``bugpoint`` reads each ``.bc`` or ``.ll`` file specified on the command line +and links them together into a single module, called the test program. If any +LLVM passes are specified on the command line, it runs these passes on the test +program. If any of the passes crash, or if they produce malformed output (which +causes the verifier to abort), ``bugpoint`` starts the `crash debugger`_. + +Otherwise, if the ``-output`` option was not specified, ``bugpoint`` runs the +test program with the "safe" backend (which is assumed to generate good code) to +generate a reference output. Once ``bugpoint`` has a reference output for the +test program, it tries executing it with the selected code generator. If the +selected code generator crashes, ``bugpoint`` starts the `crash debugger`_ on +the code generator. Otherwise, if the resulting output differs from the +reference output, it assumes the difference resulted from a code generator +failure, and starts the `code generator debugger`_. + +Finally, if the output of the selected code generator matches the reference +output, ``bugpoint`` runs the test program after all of the LLVM passes have +been applied to it. If its output differs from the reference output, it assumes +the difference resulted from a failure in one of the LLVM passes, and enters the +`miscompilation debugger`_. Otherwise, there is no problem ``bugpoint`` can +debug. + +.. _crash debugger: + +Crash debugger +-------------- + +If an optimizer or code generator crashes, ``bugpoint`` will try as hard as it +can to reduce the list of passes (for optimizer crashes) and the size of the +test program. First, ``bugpoint`` figures out which combination of optimizer +passes triggers the bug. This is useful when debugging a problem exposed by +``opt``, for example, because it runs over 38 passes. + +Next, ``bugpoint`` tries removing functions from the test program, to reduce its +size. Usually it is able to reduce a test program to a single function, when +debugging intraprocedural optimizations. Once the number of functions has been +reduced, it attempts to delete various edges in the control flow graph, to +reduce the size of the function as much as possible. Finally, ``bugpoint`` +deletes any individual LLVM instructions whose absence does not eliminate the +failure. At the end, ``bugpoint`` should tell you what passes crash, give you a +bitcode file, and give you instructions on how to reproduce the failure with +``opt`` or ``llc``. + +.. _code generator debugger: + +Code generator debugger +----------------------- + +The code generator debugger attempts to narrow down the amount of code that is +being miscompiled by the selected code generator. To do this, it takes the test +program and partitions it into two pieces: one piece which it compiles with the +"safe" backend (into a shared object), and one piece which it runs with either +the JIT or the static LLC compiler. It uses several techniques to reduce the +amount of code pushed through the LLVM code generator, to reduce the potential +scope of the problem. After it is finished, it emits two bitcode files (called +"test" [to be compiled with the code generator] and "safe" [to be compiled with +the "safe" backend], respectively), and instructions for reproducing the +problem. The code generator debugger assumes that the "safe" backend produces +good code. + +.. _miscompilation debugger: + +Miscompilation debugger +----------------------- + +The miscompilation debugger works similarly to the code generator debugger. It +works by splitting the test program into two pieces, running the optimizations +specified on one piece, linking the two pieces back together, and then executing +the result. It attempts to narrow down the list of passes to the one (or few) +which are causing the miscompilation, then reduce the portion of the test +program which is being miscompiled. The miscompilation debugger assumes that +the selected code generator is working properly. + +Advice for using bugpoint +========================= + +``bugpoint`` can be a remarkably useful tool, but it sometimes works in +non-obvious ways. Here are some hints and tips: + +* In the code generator and miscompilation debuggers, ``bugpoint`` only works + with programs that have deterministic output. Thus, if the program outputs + ``argv[0]``, the date, time, or any other "random" data, ``bugpoint`` may + misinterpret differences in these data, when output, as the result of a + miscompilation. Programs should be temporarily modified to disable outputs + that are likely to vary from run to run. + +* In the code generator and miscompilation debuggers, debugging will go faster + if you manually modify the program or its inputs to reduce the runtime, but + still exhibit the problem. + +* ``bugpoint`` is extremely useful when working on a new optimization: it helps + track down regressions quickly. To avoid having to relink ``bugpoint`` every + time you change your optimization however, have ``bugpoint`` dynamically load + your optimization with the ``-load`` option. + +* ``bugpoint`` can generate a lot of output and run for a long period of time. + It is often useful to capture the output of the program to file. For example, + in the C shell, you can run: + + .. code-block:: bash + + bugpoint ... |& tee bugpoint.log + + to get a copy of ``bugpoint``'s output in the file ``bugpoint.log``, as well + as on your terminal. + +* ``bugpoint`` cannot debug problems with the LLVM linker. If ``bugpoint`` + crashes before you see its "All input ok" message, you might try ``llvm-link + -v`` on the same set of input files. If that also crashes, you may be + experiencing a linker bug. + +* ``bugpoint`` is useful for proactively finding bugs in LLVM. Invoking + ``bugpoint`` with the ``-find-bugs`` option will cause the list of specified + optimizations to be randomized and applied to the program. This process will + repeat until a bug is found or the user kills ``bugpoint``. + +What to do when bugpoint isn't enough +===================================== + +Sometimes, ``bugpoint`` is not enough. In particular, InstCombine and +TargetLowering both have visitor structured code with lots of potential +transformations. If the process of using bugpoint has left you with still too +much code to figure out and the problem seems to be in instcombine, the +following steps may help. These same techniques are useful with TargetLowering +as well. + +Turn on ``-debug-only=instcombine`` and see which transformations within +instcombine are firing by selecting out lines with "``IC``" in them. + +At this point, you have a decision to make. Is the number of transformations +small enough to step through them using a debugger? If so, then try that. + +If there are too many transformations, then a source modification approach may +be helpful. In this approach, you can modify the source code of instcombine to +disable just those transformations that are being performed on your test input +and perform a binary search over the set of transformations. One set of places +to modify are the "``visit*``" methods of ``InstCombiner`` (*e.g.* +``visitICmpInst``) by adding a "``return false``" as the first line of the +method. + +If that still doesn't remove enough, then change the caller of +``InstCombiner::DoOneIteration``, ``InstCombiner::runOnFunction`` to limit the +number of iterations. + +You may also find it useful to use "``-stats``" now to see what parts of +instcombine are firing. This can guide where to put additional reporting code. + +At this point, if the amount of transformations is still too large, then +inserting code to limit whether or not to execute the body of the code in the +visit function can be helpful. Add a static counter which is incremented on +every invocation of the function. Then add code which simply returns false on +desired ranges. For example: + +.. code-block:: c++ + + + static int calledCount = 0; + calledCount++; + DEBUG(if (calledCount < 212) return false); + DEBUG(if (calledCount > 217) return false); + DEBUG(if (calledCount == 213) return false); + DEBUG(if (calledCount == 214) return false); + DEBUG(if (calledCount == 215) return false); + DEBUG(if (calledCount == 216) return false); + DEBUG(dbgs() << "visitXOR calledCount: " << calledCount << "\n"); + DEBUG(dbgs() << "I: "; I->dump()); + +could be added to ``visitXOR`` to limit ``visitXor`` to being applied only to +calls 212 and 217. This is from an actual test case and raises an important +point---a simple binary search may not be sufficient, as transformations that +interact may require isolating more than one call. In TargetLowering, use +``return SDNode();`` instead of ``return false;``. + +Now that that the number of transformations is down to a manageable number, try +examining the output to see if you can figure out which transformations are +being done. If that can be figured out, then do the usual debugging. If which +code corresponds to the transformation being performed isn't obvious, set a +breakpoint after the call count based disabling and step through the code. +Alternatively, you can use "``printf``" style debugging to report waypoints. diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index 672dc294a7..651eb96603 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -218,7 +218,8 @@ support completely non-traditional code generation targets. For example, the C backend does not require register allocation, instruction selection, or any of the other standard components provided by the system. As such, it only - implements these two interfaces, and does its own thing. Another example of + implements these two interfaces, and does its own thing. Note that C backend + was removed from the trunk since LLVM 3.1 release. Another example of a code generator like this is a (purely hypothetical) backend that converts LLVM to the GCC RTL form and uses GCC to emit machine code for a target.</p> diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html deleted file mode 100644 index f92c20baa2..0000000000 --- a/docs/CodingStandards.html +++ /dev/null @@ -1,1568 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> - <title>LLVM Coding Standards</title> -</head> -<body> - -<h1> - LLVM Coding Standards -</h1> - -<ol> - <li><a href="#introduction">Introduction</a></li> - <li><a href="#mechanicalissues">Mechanical Source Issues</a> - <ol> - <li><a href="#sourceformating">Source Code Formatting</a> - <ol> - <li><a href="#scf_commenting">Commenting</a></li> - <li><a href="#scf_commentformat">Comment Formatting</a></li> - <li><a href="#scf_includes"><tt>#include</tt> Style</a></li> - <li><a href="#scf_codewidth">Source Code Width</a></li> - <li><a href="#scf_spacestabs">Use Spaces Instead of Tabs</a></li> - <li><a href="#scf_indentation">Indent Code Consistently</a></li> - </ol></li> - <li><a href="#compilerissues">Compiler Issues</a> - <ol> - <li><a href="#ci_warningerrors">Treat Compiler Warnings Like - Errors</a></li> - <li><a href="#ci_portable_code">Write Portable Code</a></li> - <li><a href="#ci_rtti_exceptions">Do not use RTTI or Exceptions</a></li> - <li><a href="#ci_static_ctors">Do not use Static Constructors</a></li> - <li><a href="#ci_class_struct">Use of <tt>class</tt>/<tt>struct</tt> Keywords</a></li> - </ol></li> - </ol></li> - <li><a href="#styleissues">Style Issues</a> - <ol> - <li><a href="#macro">The High-Level Issues</a> - <ol> - <li><a href="#hl_module">A Public Header File <b>is</b> a - Module</a></li> - <li><a href="#hl_dontinclude"><tt>#include</tt> as Little as Possible</a></li> - <li><a href="#hl_privateheaders">Keep "internal" Headers - Private</a></li> - <li><a href="#hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify - Code</a></li> - <li><a href="#hl_else_after_return">Don't use <tt>else</tt> after a - <tt>return</tt></a></li> - <li><a href="#hl_predicateloops">Turn Predicate Loops into Predicate - Functions</a></li> - </ol></li> - <li><a href="#micro">The Low-Level Issues</a> - <ol> - <li><a href="#ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a></li> - <li><a href="#ll_assert">Assert Liberally</a></li> - <li><a href="#ll_ns_std">Do not use '<tt>using namespace std</tt>'</a></li> - <li><a href="#ll_virtual_anch">Provide a virtual method anchor for - classes in headers</a></li> - <li><a href="#ll_end">Don't evaluate <tt>end()</tt> every time through a - loop</a></li> - <li><a href="#ll_iostream"><tt>#include <iostream></tt> is - <em>forbidden</em></a></li> - <li><a href="#ll_raw_ostream">Use <tt>raw_ostream</tt></a></li> - <li><a href="#ll_avoidendl">Avoid <tt>std::endl</tt></a></li> - </ol></li> - - <li><a href="#nano">Microscopic Details</a> - <ol> - <li><a href="#micro_spaceparen">Spaces Before Parentheses</a></li> - <li><a href="#micro_preincrement">Prefer Preincrement</a></li> - <li><a href="#micro_namespaceindent">Namespace Indentation</a></li> - <li><a href="#micro_anonns">Anonymous Namespaces</a></li> - </ol></li> - - - </ol></li> - <li><a href="#seealso">See Also</a></li> -</ol> - -<div class="doc_author"> - <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p> -</div> - - -<!-- *********************************************************************** --> -<h2><a name="introduction">Introduction</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>This document attempts to describe a few coding standards that are being used -in the LLVM source tree. Although no coding standards should be regarded as -absolute requirements to be followed in all instances, coding standards are -particularly important for large-scale code bases that follow a library-based -design (like LLVM).</p> - -<p>This document intentionally does not prescribe fixed standards for religious -issues such as brace placement and space usage. For issues like this, follow -the golden rule:</p> - -<blockquote> - -<p><b><a name="goldenrule">If you are extending, enhancing, or bug fixing -already implemented code, use the style that is already being used so that the -source is uniform and easy to follow.</a></b></p> - -</blockquote> - -<p>Note that some code bases (e.g. libc++) have really good reasons to deviate -from the coding standards. In the case of libc++, this is because the naming -and other conventions are dictated by the C++ standard. If you think there is -a specific good reason to deviate from the standards here, please bring it up -on the LLVMdev mailing list.</p> - -<p>There are some conventions that are not uniformly followed in the code base -(e.g. the naming convention). This is because they are relatively new, and a -lot of code was written before they were put in place. Our long term goal is -for the entire codebase to follow the convention, but we explicitly <em>do -not</em> want patches that do large-scale reformating of existing code. OTOH, -it is reasonable to rename the methods of a class if you're about to change it -in some other way. Just do the reformating as a separate commit from the -functionality change. </p> - -<p>The ultimate goal of these guidelines is the increase readability and -maintainability of our common source base. If you have suggestions for topics to -be included, please mail them to <a -href="mailto:sabre@nondot.org">Chris</a>.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="mechanicalissues">Mechanical Source Issues</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<!-- ======================================================================= --> -<h3> - <a name="sourceformating">Source Code Formatting</a> -</h3> - -<div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_commenting">Commenting</a> -</h4> - -<div> - -<p>Comments are one critical part of readability and maintainability. Everyone -knows they should comment their code, and so should you. When writing comments, -write them as English prose, which means they should use proper capitalization, -punctuation, etc. Aim to describe what a code is trying to do and why, not -"how" it does it at a micro level. Here are a few critical things to -document:</p> - -<h5>File Headers</h5> - -<div> - -<p>Every source file should have a header on it that describes the basic -purpose of the file. If a file does not have a header, it should not be -checked into the tree. The standard header looks like this:</p> - -<div class="doc_code"> -<pre> -//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the Instruction class, which is the -// base class for all of the VM instructions. -// -//===----------------------------------------------------------------------===// -</pre> -</div> - -<p>A few things to note about this particular format: The "<tt>-*- C++ --*-</tt>" string on the first line is there to tell Emacs that the source file -is a C++ file, not a C file (Emacs assumes <tt>.h</tt> files are C files by default). -Note that this tag is not necessary in <tt>.cpp</tt> files. The name of the file is also -on the first line, along with a very short description of the purpose of the -file. This is important when printing out code and flipping though lots of -pages.</p> - -<p>The next section in the file is a concise note that defines the license -that the file is released under. This makes it perfectly clear what terms the -source code can be distributed under and should not be modified in any way.</p> - -<p>The main body of the description does not have to be very long in most cases. -Here it's only two lines. If an algorithm is being implemented or something -tricky is going on, a reference to the paper where it is published should be -included, as well as any notes or "gotchas" in the code to watch out for.</p> - -</div> - -<h5>Class overviews</h5> - -<p>Classes are one fundamental part of a good object oriented design. As such, -a class definition should have a comment block that explains what the class is -used for and how it works. Every non-trivial class is expected to have a -doxygen comment block.</p> - - -<h5>Method information</h5> - -<div> - -<p>Methods defined in a class (as well as any global functions) should also be -documented properly. A quick note about what it does and a description of the -borderline behaviour is all that is necessary here (unless something -particularly tricky or insidious is going on). The hope is that people can -figure out how to use your interfaces without reading the code itself.</p> - -<p>Good things to talk about here are what happens when something unexpected -happens: does the method return null? Abort? Format your hard disk?</p> - -</div> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_commentformat">Comment Formatting</a> -</h4> - -<div> - -<p>In general, prefer C++ style (<tt>//</tt>) comments. They take less space, -require less typing, don't have nesting problems, etc. There are a few cases -when it is useful to use C style (<tt>/* */</tt>) comments however:</p> - -<ol> - <li>When writing C code: Obviously if you are writing C code, use C style - comments.</li> - <li>When writing a header file that may be <tt>#include</tt>d by a C source - file.</li> - <li>When writing a source file that is used by a tool that only accepts C - style comments.</li> -</ol> - -<p>To comment out a large block of code, use <tt>#if 0</tt> and <tt>#endif</tt>. -These nest properly and are better behaved in general than C style comments.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_includes"><tt>#include</tt> Style</a> -</h4> - -<div> - -<p>Immediately after the <a href="#scf_commenting">header file comment</a> (and -include guards if working on a header file), the <a -href="#hl_dontinclude">minimal</a> list of <tt>#include</tt>s required by the -file should be listed. We prefer these <tt>#include</tt>s to be listed in this -order:</p> - -<ol> - <li><a href="#mmheader">Main Module Header</a></li> - <li><a href="#hl_privateheaders">Local/Private Headers</a></li> - <li><tt>llvm/*</tt></li> - <li><tt>llvm/Analysis/*</tt></li> - <li><tt>llvm/Assembly/*</tt></li> - <li><tt>llvm/Bitcode/*</tt></li> - <li><tt>llvm/CodeGen/*</tt></li> - <li>...</li> - <li><tt>Support/*</tt></li> - <li><tt>Config/*</tt></li> - <li>System <tt>#includes</tt></li> -</ol> - -<p>and each category should be sorted by name.</p> - -<p><a name="mmheader">The "Main Module Header"</a> file applies to <tt>.cpp</tt> files -which implement an interface defined by a <tt>.h</tt> file. This <tt>#include</tt> -should always be included <b>first</b> regardless of where it lives on the file -system. By including a header file first in the <tt>.cpp</tt> files that implement the -interfaces, we ensure that the header does not have any hidden dependencies -which are not explicitly #included in the header, but should be. It is also a -form of documentation in the <tt>.cpp</tt> file to indicate where the interfaces it -implements are defined.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_codewidth">Source Code Width</a> -</h4> - -<div> - -<p>Write your code to fit within 80 columns of text. This helps those of us who -like to print out code and look at your code in an xterm without resizing -it.</p> - -<p>The longer answer is that there must be some limit to the width of the code -in order to reasonably allow developers to have multiple files side-by-side in -windows on a modest display. If you are going to pick a width limit, it is -somewhat arbitrary but you might as well pick something standard. Going with -90 columns (for example) instead of 80 columns wouldn't add any significant -value and would be detrimental to printing out code. Also many other projects -have standardized on 80 columns, so some people have already configured their -editors for it (vs something else, like 90 columns).</p> - -<p>This is one of many contentious issues in coding standards, but it is not up -for debate.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_spacestabs">Use Spaces Instead of Tabs</a> -</h4> - -<div> - -<p>In all cases, prefer spaces to tabs in source files. People have different -preferred indentation levels, and different styles of indentation that they -like; this is fine. What isn't fine is that different editors/viewers expand -tabs out to different tab stops. This can cause your code to look completely -unreadable, and it is not worth dealing with.</p> - -<p>As always, follow the <a href="#goldenrule">Golden Rule</a> above: follow the -style of existing code if you are modifying and extending it. If you like four -spaces of indentation, <b>DO NOT</b> do that in the middle of a chunk of code -with two spaces of indentation. Also, do not reindent a whole source file: it -makes for incredible diffs that are absolutely worthless.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="scf_indentation">Indent Code Consistently</a> -</h4> - -<div> - -<p>Okay, in your first year of programming you were told that indentation is -important. If you didn't believe and internalize this then, now is the time. -Just do it.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="compilerissues">Compiler Issues</a> -</h3> - -<div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ci_warningerrors">Treat Compiler Warnings Like Errors</a> -</h4> - -<div> - -<p>If your code has compiler warnings in it, something is wrong — you -aren't casting values correctly, your have "questionable" constructs in your -code, or you are doing something legitimately wrong. Compiler warnings can -cover up legitimate errors in output and make dealing with a translation unit -difficult.</p> - -<p>It is not possible to prevent all warnings from all compilers, nor is it -desirable. Instead, pick a standard compiler (like <tt>gcc</tt>) that provides -a good thorough set of warnings, and stick to it. At least in the case of -<tt>gcc</tt>, it is possible to work around any spurious errors by changing the -syntax of the code slightly. For example, a warning that annoys me occurs when -I write code like this:</p> - -<div class="doc_code"> -<pre> -if (V = getValue()) { - ... -} -</pre> -</div> - -<p><tt>gcc</tt> will warn me that I probably want to use the <tt>==</tt> -operator, and that I probably mistyped it. In most cases, I haven't, and I -really don't want the spurious errors. To fix this particular problem, I -rewrite the code like this:</p> - -<div class="doc_code"> -<pre> -if ((V = getValue())) { - ... -} -</pre> -</div> - -<p>which shuts <tt>gcc</tt> up. Any <tt>gcc</tt> warning that annoys you can -be fixed by massaging the code appropriately.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ci_portable_code">Write Portable Code</a> -</h4> - -<div> - -<p>In almost all cases, it is possible and within reason to write completely -portable code. If there are cases where it isn't possible to write portable -code, isolate it behind a well defined (and well documented) interface.</p> - -<p>In practice, this means that you shouldn't assume much about the host -compiler, and Visual Studio tends to be the lowest common denominator. -If advanced features are used, they should only be an implementation detail of -a library which has a simple exposed API, and preferably be buried in -libSystem.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> -<a name="ci_rtti_exceptions">Do not use RTTI or Exceptions</a> -</h4> -<div> - -<p>In an effort to reduce code and executable size, LLVM does not use RTTI -(e.g. <tt>dynamic_cast<></tt>) or exceptions. These two language features -violate the general C++ principle of <i>"you only pay for what you use"</i>, -causing executable bloat even if exceptions are never used in the code base, or -if RTTI is never used for a class. Because of this, we turn them off globally -in the code.</p> - -<p>That said, LLVM does make extensive use of a hand-rolled form of RTTI that -use templates like <a href="ProgrammersManual.html#isa"><tt>isa<></tt>, -<tt>cast<></tt>, and <tt>dyn_cast<></tt></a>. This form of RTTI is -opt-in and can be added to any class. It is also substantially more efficient -than <tt>dynamic_cast<></tt>.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> -<a name="ci_static_ctors">Do not use Static Constructors</a> -</h4> -<div> - -<p>Static constructors and destructors (e.g. global variables whose types have -a constructor or destructor) should not be added to the code base, and should be -removed wherever possible. Besides <a -href="http://yosefk.com/c++fqa/ctors.html#fqa-10.12">well known problems</a> -where the order of initialization is undefined between globals in different -source files, the entire concept of static constructors is at odds with the -common use case of LLVM as a library linked into a larger application.</p> - -<p>Consider the use of LLVM as a JIT linked into another application (perhaps -for <a href="http://llvm.org/Users.html">OpenGL, custom languages</a>, -<a href="http://llvm.org/devmtg/2010-11/Gritz-OpenShadingLang.pdf">shaders in -movies</a>, etc). Due to the design of static constructors, they must be -executed at startup time of the entire application, regardless of whether or -how LLVM is used in that larger application. There are two problems with -this:</p> - -<ol> - <li>The time to run the static constructors impacts startup time of - applications — a critical time for GUI apps, among others.</li> - - <li>The static constructors cause the app to pull many extra pages of memory - off the disk: both the code for the constructor in each <tt>.o</tt> file and - the small amount of data that gets touched. In addition, touched/dirty pages - put more pressure on the VM system on low-memory machines.</li> -</ol> - -<p>We would really like for there to be zero cost for linking in an additional -LLVM target or other library into an application, but static constructors -violate this goal.</p> - -<p>That said, LLVM unfortunately does contain static constructors. It would be -a <a href="http://llvm.org/PR11944">great project</a> for someone to purge all -static constructors from LLVM, and then enable the -<tt>-Wglobal-constructors</tt> warning flag (when building with Clang) to ensure -we do not regress in the future. -</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> -<a name="ci_class_struct">Use of <tt>class</tt> and <tt>struct</tt> Keywords</a> -</h4> -<div> - -<p>In C++, the <tt>class</tt> and <tt>struct</tt> keywords can be used almost -interchangeably. The only difference is when they are used to declare a class: -<tt>class</tt> makes all members private by default while <tt>struct</tt> makes -all members public by default.</p> - -<p>Unfortunately, not all compilers follow the rules and some will generate -different symbols based on whether <tt>class</tt> or <tt>struct</tt> was used to -declare the symbol. This can lead to problems at link time.</p> - -<p>So, the rule for LLVM is to always use the <tt>class</tt> keyword, unless -<b>all</b> members are public and the type is a C++ -<a href="http://en.wikipedia.org/wiki/Plain_old_data_structure">POD</a> type, in -which case <tt>struct</tt> is allowed.</p> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="styleissues">Style Issues</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<!-- ======================================================================= --> -<h3> - <a name="macro">The High-Level Issues</a> -</h3> -<!-- ======================================================================= --> - -<div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_module">A Public Header File <b>is</b> a Module</a> -</h4> - -<div> - -<p>C++ doesn't do too well in the modularity department. There is no real -encapsulation or data hiding (unless you use expensive protocol classes), but it -is what we have to work with. When you write a public header file (in the LLVM -source tree, they live in the top level "<tt>include</tt>" directory), you are -defining a module of functionality.</p> - -<p>Ideally, modules should be completely independent of each other, and their -header files should only <tt>#include</tt> the absolute minimum number of -headers possible. A module is not just a class, a function, or a -namespace: <a href="http://www.cuj.com/articles/2000/0002/0002c/0002c.htm">it's -a collection of these</a> that defines an interface. This interface may be -several functions, classes, or data structures, but the important issue is how -they work together.</p> - -<p>In general, a module should be implemented by one or more <tt>.cpp</tt> -files. Each of these <tt>.cpp</tt> files should include the header that defines -their interface first. This ensures that all of the dependences of the module -header have been properly added to the module header itself, and are not -implicit. System headers should be included after user headers for a -translation unit.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_dontinclude"><tt>#include</tt> as Little as Possible</a> -</h4> - -<div> - -<p><tt>#include</tt> hurts compile time performance. Don't do it unless you -have to, especially in header files.</p> - -<p>But wait! Sometimes you need to have the definition of a class to use it, or -to inherit from it. In these cases go ahead and <tt>#include</tt> that header -file. Be aware however that there are many cases where you don't need to have -the full definition of a class. If you are using a pointer or reference to a -class, you don't need the header file. If you are simply returning a class -instance from a prototyped function or method, you don't need it. In fact, for -most cases, you simply don't need the definition of a class. And not -<tt>#include</tt>'ing speeds up compilation.</p> - -<p>It is easy to try to go too overboard on this recommendation, however. You -<b>must</b> include all of the header files that you are using — you can -include them either directly or indirectly (through another header file). To -make sure that you don't accidentally forget to include a header file in your -module header, make sure to include your module header <b>first</b> in the -implementation file (as mentioned above). This way there won't be any hidden -dependencies that you'll find out about later.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_privateheaders">Keep "Internal" Headers Private</a> -</h4> - -<div> - -<p>Many modules have a complex implementation that causes them to use more than -one implementation (<tt>.cpp</tt>) file. It is often tempting to put the -internal communication interface (helper classes, extra functions, etc) in the -public module header file. Don't do this!</p> - -<p>If you really need to do something like this, put a private header file in -the same directory as the source files, and include it locally. This ensures -that your private interface remains private and undisturbed by outsiders.</p> - -<p>Note however, that it's okay to put extra implementation methods in a public -class itself. Just make them private (or protected) and all is well.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify Code</a> -</h4> - -<div> - -<p>When reading code, keep in mind how much state and how many previous -decisions have to be remembered by the reader to understand a block of code. -Aim to reduce indentation where possible when it doesn't make it more difficult -to understand the code. One great way to do this is by making use of early -exits and the <tt>continue</tt> keyword in long loops. As an example of using -an early exit from a function, consider this "bad" code:</p> - -<div class="doc_code"> -<pre> -Value *DoSomething(Instruction *I) { - if (!isa<TerminatorInst>(I) && - I->hasOneUse() && SomeOtherThing(I)) { - ... some long code .... - } - - return 0; -} -</pre> -</div> - -<p>This code has several problems if the body of the '<tt>if</tt>' is large. -When you're looking at the top of the function, it isn't immediately clear that -this <em>only</em> does interesting things with non-terminator instructions, and -only applies to things with the other predicates. Second, it is relatively -difficult to describe (in comments) why these predicates are important because -the <tt>if</tt> statement makes it difficult to lay out the comments. Third, -when you're deep within the body of the code, it is indented an extra level. -Finally, when reading the top of the function, it isn't clear what the result is -if the predicate isn't true; you have to read to the end of the function to know -that it returns null.</p> - -<p>It is much preferred to format the code like this:</p> - -<div class="doc_code"> -<pre> -Value *DoSomething(Instruction *I) { - // Terminators never need 'something' done to them because ... - if (isa<TerminatorInst>(I)) - return 0; - - // We conservatively avoid transforming instructions with multiple uses - // because goats like cheese. - if (!I->hasOneUse()) - return 0; - - // This is really just here for example. - if (!SomeOtherThing(I)) - return 0; - - ... some long code .... -} -</pre> -</div> - -<p>This fixes these problems. A similar problem frequently happens in <tt>for</tt> -loops. A silly example is something like this:</p> - -<div class="doc_code"> -<pre> - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(II)) { - Value *LHS = BO->getOperand(0); - Value *RHS = BO->getOperand(1); - if (LHS != RHS) { - ... - } - } - } -</pre> -</div> - -<p>When you have very, very small loops, this sort of structure is fine. But if -it exceeds more than 10-15 lines, it becomes difficult for people to read and -understand at a glance. The problem with this sort of code is that it gets very -nested very quickly. Meaning that the reader of the code has to keep a lot of -context in their brain to remember what is going immediately on in the loop, -because they don't know if/when the <tt>if</tt> conditions will have elses etc. -It is strongly preferred to structure the loop like this:</p> - -<div class="doc_code"> -<pre> - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - BinaryOperator *BO = dyn_cast<BinaryOperator>(II); - if (!BO) continue; - - Value *LHS = BO->getOperand(0); - Value *RHS = BO->getOperand(1); - if (LHS == RHS) continue; - - ... - } -</pre> -</div> - -<p>This has all the benefits of using early exits for functions: it reduces -nesting of the loop, it makes it easier to describe why the conditions are true, -and it makes it obvious to the reader that there is no <tt>else</tt> coming up -that they have to push context into their brain for. If a loop is large, this -can be a big understandability win.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_else_after_return">Don't use <tt>else</tt> after a <tt>return</tt></a> -</h4> - -<div> - -<p>For similar reasons above (reduction of indentation and easier reading), -please do not use '<tt>else</tt>' or '<tt>else if</tt>' after something that -interrupts control flow — like <tt>return</tt>, <tt>break</tt>, -<tt>continue</tt>, <tt>goto</tt>, etc. For example, this is <em>bad</em>:</p> - -<div class="doc_code"> -<pre> - case 'J': { - if (Signed) { - Type = Context.getsigjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_sigjmp_buf; - return QualType(); - <b>} else { - break; - }</b> - } else { - Type = Context.getjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_jmp_buf; - return QualType(); - <b>} else { - break; - }</b> - } - } - } -</pre> -</div> - -<p>It is better to write it like this:</p> - -<div class="doc_code"> -<pre> - case 'J': - if (Signed) { - Type = Context.getsigjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_sigjmp_buf; - return QualType(); - } - } else { - Type = Context.getjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_jmp_buf; - return QualType(); - } - } - <b>break;</b> -</pre> -</div> - -<p>Or better yet (in this case) as:</p> - -<div class="doc_code"> -<pre> - case 'J': - if (Signed) - Type = Context.getsigjmp_bufType(); - else - Type = Context.getjmp_bufType(); - - if (Type.isNull()) { - Error = Signed ? ASTContext::GE_Missing_sigjmp_buf : - ASTContext::GE_Missing_jmp_buf; - return QualType(); - } - <b>break;</b> -</pre> -</div> - -<p>The idea is to reduce indentation and the amount of code you have to keep -track of when reading the code.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="hl_predicateloops">Turn Predicate Loops into Predicate Functions</a> -</h4> - -<div> - -<p>It is very common to write small loops that just compute a boolean value. -There are a number of ways that people commonly write these, but an example of -this sort of thing is:</p> - -<div class="doc_code"> -<pre> - <b>bool FoundFoo = false;</b> - for (unsigned i = 0, e = BarList.size(); i != e; ++i) - if (BarList[i]->isFoo()) { - <b>FoundFoo = true;</b> - break; - } - - <b>if (FoundFoo) {</b> - ... - } -</pre> -</div> - -<p>This sort of code is awkward to write, and is almost always a bad sign. -Instead of this sort of loop, we strongly prefer to use a predicate function -(which may be <a href="#micro_anonns">static</a>) that uses -<a href="#hl_earlyexit">early exits</a> to compute the predicate. We prefer -the code to be structured like this:</p> - -<div class="doc_code"> -<pre> -/// ListContainsFoo - Return true if the specified list has an element that is -/// a foo. -static bool ListContainsFoo(const std::vector<Bar*> &List) { - for (unsigned i = 0, e = List.size(); i != e; ++i) - if (List[i]->isFoo()) - return true; - return false; -} -... - - <b>if (ListContainsFoo(BarList)) {</b> - ... - } -</pre> -</div> - -<p>There are many reasons for doing this: it reduces indentation and factors out -code which can often be shared by other code that checks for the same predicate. -More importantly, it <em>forces you to pick a name</em> for the function, and -forces you to write a comment for it. In this silly example, this doesn't add -much value. However, if the condition is complex, this can make it a lot easier -for the reader to understand the code that queries for this predicate. Instead -of being faced with the in-line details of how we check to see if the BarList -contains a foo, we can trust the function name and continue reading with better -locality.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="micro">The Low-Level Issues</a> -</h3> -<!-- ======================================================================= --> - -<div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_naming"> - Name Types, Functions, Variables, and Enumerators Properly - </a> -</h4> - -<div> - -<p>Poorly-chosen names can mislead the reader and cause bugs. We cannot stress -enough how important it is to use <em>descriptive</em> names. Pick names that -match the semantics and role of the underlying entities, within reason. Avoid -abbreviations unless they are well known. After picking a good name, make sure -to use consistent capitalization for the name, as inconsistency requires clients -to either memorize the APIs or to look it up to find the exact spelling.</p> - -<p>In general, names should be in camel case (e.g. <tt>TextFileReader</tt> -and <tt>isLValue()</tt>). Different kinds of declarations have different -rules:</p> - -<ul> -<li><p><b>Type names</b> (including classes, structs, enums, typedefs, etc) - should be nouns and start with an upper-case letter (e.g. - <tt>TextFileReader</tt>).</p></li> - -<li><p><b>Variable names</b> should be nouns (as they represent state). The - name should be camel case, and start with an upper case letter (e.g. - <tt>Leader</tt> or <tt>Boats</tt>).</p></li> - -<li><p><b>Function names</b> should be verb phrases (as they represent - actions), and command-like function should be imperative. The name should - be camel case, and start with a lower case letter (e.g. <tt>openFile()</tt> - or <tt>isFoo()</tt>).</p></li> - -<li><p><b>Enum declarations</b> (e.g. <tt>enum Foo {...}</tt>) are types, so - they should follow the naming conventions for types. A common use for enums - is as a discriminator for a union, or an indicator of a subclass. When an - enum is used for something like this, it should have a <tt>Kind</tt> suffix - (e.g. <tt>ValueKind</tt>).</p></li> - -<li><p><b>Enumerators</b> (e.g. <tt>enum { Foo, Bar }</tt>) and <b>public member - variables</b> should start with an upper-case letter, just like types. - Unless the enumerators are defined in their own small namespace or inside a - class, enumerators should have a prefix corresponding to the enum - declaration name. For example, <tt>enum ValueKind { ... };</tt> may contain - enumerators like <tt>VK_Argument</tt>, <tt>VK_BasicBlock</tt>, etc. - Enumerators that are just convenience constants are exempt from the - requirement for a prefix. For instance:</p> - -<div class="doc_code"> -<pre> -enum { - MaxSize = 42, - Density = 12 -}; -</pre> -</div> -</li> - -</ul> - -<p>As an exception, classes that mimic STL classes can have member names in -STL's style of lower-case words separated by underscores (e.g. <tt>begin()</tt>, -<tt>push_back()</tt>, and <tt>empty()</tt>).</p> - -<p>Here are some examples of good and bad names:</p> - -<div class="doc_code"> -<pre> -class VehicleMaker { - ... - Factory<Tire> F; // Bad -- abbreviation and non-descriptive. - Factory<Tire> Factory; // Better. - Factory<Tire> TireFactory; // Even better -- if VehicleMaker has more than one - // kind of factories. -}; - -Vehicle MakeVehicle(VehicleType Type) { - VehicleMaker M; // Might be OK if having a short life-span. - Tire tmp1 = M.makeTire(); // Bad -- 'tmp1' provides no information. - Light headlight = M.makeLight("head"); // Good -- descriptive. - ... -} -</pre> -</div> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_assert">Assert Liberally</a> -</h4> - -<div> - -<p>Use the "<tt>assert</tt>" macro to its fullest. Check all of your -preconditions and assumptions, you never know when a bug (not necessarily even -yours) might be caught early by an assertion, which reduces debugging time -dramatically. The "<tt><cassert></tt>" header file is probably already -included by the header files you are using, so it doesn't cost anything to use -it.</p> - -<p>To further assist with debugging, make sure to put some kind of error message -in the assertion statement, which is printed if the assertion is tripped. This -helps the poor debugger make sense of why an assertion is being made and -enforced, and hopefully what to do about it. Here is one complete example:</p> - -<div class="doc_code"> -<pre> -inline Value *getOperand(unsigned i) { - assert(i < Operands.size() && "getOperand() out of range!"); - return Operands[i]; -} -</pre> -</div> - -<p>Here are more examples:</p> - -<div class="doc_code"> -<pre> -assert(Ty->isPointerType() && "Can't allocate a non pointer type!"); - -assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); - -assert(idx < getNumSuccessors() && "Successor # out of range!"); - -assert(V1.getType() == V2.getType() && "Constant types must be identical!"); - -assert(isa<PHINode>(Succ->front()) && "Only works on PHId BBs!"); -</pre> -</div> - -<p>You get the idea.</p> - -<p>Please be aware that, when adding assert statements, not all compilers are aware of -the semantics of the assert. In some places, asserts are used to indicate a piece of -code that should not be reached. These are typically of the form:</p> - -<div class="doc_code"> -<pre> -assert(0 && "Some helpful error message"); -</pre> -</div> - -<p>When used in a function that returns a value, they should be followed with a return -statement and a comment indicating that this line is never reached. This will prevent -a compiler which is unable to deduce that the assert statement never returns from -generating a warning.</p> - -<div class="doc_code"> -<pre> -assert(0 && "Some helpful error message"); -// Not reached -return 0; -</pre> -</div> - -<p>Another issue is that values used only by assertions will produce an "unused -value" warning when assertions are disabled. For example, this code will -warn:</p> - -<div class="doc_code"> -<pre> -unsigned Size = V.size(); -assert(Size > 42 && "Vector smaller than it should be"); - -bool NewToSet = Myset.insert(Value); -assert(NewToSet && "The value shouldn't be in the set yet"); -</pre> -</div> - -<p>These are two interesting different cases. In the first case, the call to -V.size() is only useful for the assert, and we don't want it executed when -assertions are disabled. Code like this should move the call into the assert -itself. In the second case, the side effects of the call must happen whether -the assert is enabled or not. In this case, the value should be cast to void to -disable the warning. To be specific, it is preferred to write the code like -this:</p> - -<div class="doc_code"> -<pre> -assert(V.size() > 42 && "Vector smaller than it should be"); - -bool NewToSet = Myset.insert(Value); (void)NewToSet; -assert(NewToSet && "The value shouldn't be in the set yet"); -</pre> -</div> - - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_ns_std">Do Not Use '<tt>using namespace std</tt>'</a> -</h4> - -<div> - -<p>In LLVM, we prefer to explicitly prefix all identifiers from the standard -namespace with an "<tt>std::</tt>" prefix, rather than rely on -"<tt>using namespace std;</tt>".</p> - -<p> In header files, adding a '<tt>using namespace XXX</tt>' directive pollutes -the namespace of any source file that <tt>#include</tt>s the header. This is -clearly a bad thing.</p> - -<p>In implementation files (e.g. <tt>.cpp</tt> files), the rule is more of a stylistic -rule, but is still important. Basically, using explicit namespace prefixes -makes the code <b>clearer</b>, because it is immediately obvious what facilities -are being used and where they are coming from. And <b>more portable</b>, because -namespace clashes cannot occur between LLVM code and other namespaces. The -portability rule is important because different standard library implementations -expose different symbols (potentially ones they shouldn't), and future revisions -to the C++ standard will add more symbols to the <tt>std</tt> namespace. As -such, we never use '<tt>using namespace std;</tt>' in LLVM.</p> - -<p>The exception to the general rule (i.e. it's not an exception for -the <tt>std</tt> namespace) is for implementation files. For example, all of -the code in the LLVM project implements code that lives in the 'llvm' namespace. -As such, it is ok, and actually clearer, for the <tt>.cpp</tt> files to have a -'<tt>using namespace llvm;</tt>' directive at the top, after the -<tt>#include</tt>s. This reduces indentation in the body of the file for source -editors that indent based on braces, and keeps the conceptual context cleaner. -The general form of this rule is that any <tt>.cpp</tt> file that implements -code in any namespace may use that namespace (and its parents'), but should not -use any others.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_virtual_anch"> - Provide a Virtual Method Anchor for Classes in Headers - </a> -</h4> - -<div> - -<p>If a class is defined in a header file and has a v-table (either it has -virtual methods or it derives from classes with virtual methods), it must -always have at least one out-of-line virtual method in the class. Without -this, the compiler will copy the vtable and RTTI into every <tt>.o</tt> file -that <tt>#include</tt>s the header, bloating <tt>.o</tt> file sizes and -increasing link times.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_end">Don't evaluate <tt>end()</tt> every time through a loop</a> -</h4> - -<div> - -<p>Because C++ doesn't have a standard "<tt>foreach</tt>" loop (though it can be -emulated with macros and may be coming in C++'0x) we end up writing a lot of -loops that manually iterate from begin to end on a variety of containers or -through other data structures. One common mistake is to write a loop in this -style:</p> - -<div class="doc_code"> -<pre> - BasicBlock *BB = ... - for (BasicBlock::iterator I = BB->begin(); I != <b>BB->end()</b>; ++I) - ... use I ... -</pre> -</div> - -<p>The problem with this construct is that it evaluates "<tt>BB->end()</tt>" -every time through the loop. Instead of writing the loop like this, we strongly -prefer loops to be written so that they evaluate it once before the loop starts. -A convenient way to do this is like so:</p> - -<div class="doc_code"> -<pre> - BasicBlock *BB = ... - for (BasicBlock::iterator I = BB->begin(), E = <b>BB->end()</b>; I != E; ++I) - ... use I ... -</pre> -</div> - -<p>The observant may quickly point out that these two loops may have different -semantics: if the container (a basic block in this case) is being mutated, then -"<tt>BB->end()</tt>" may change its value every time through the loop and the -second loop may not in fact be correct. If you actually do depend on this -behavior, please write the loop in the first form and add a comment indicating -that you did it intentionally.</p> - -<p>Why do we prefer the second form (when correct)? Writing the loop in the -first form has two problems. First it may be less efficient than evaluating it -at the start of the loop. In this case, the cost is probably minor — a -few extra loads every time through the loop. However, if the base expression is -more complex, then the cost can rise quickly. I've seen loops where the end -expression was actually something like: "<tt>SomeMap[x]->end()</tt>" and map -lookups really aren't cheap. By writing it in the second form consistently, you -eliminate the issue entirely and don't even have to think about it.</p> - -<p>The second (even bigger) issue is that writing the loop in the first form -hints to the reader that the loop is mutating the container (a fact that a -comment would handily confirm!). If you write the loop in the second form, it -is immediately obvious without even looking at the body of the loop that the -container isn't being modified, which makes it easier to read the code and -understand what it does.</p> - -<p>While the second form of the loop is a few extra keystrokes, we do strongly -prefer it.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_iostream"><tt>#include <iostream></tt> is Forbidden</a> -</h4> - -<div> - -<p>The use of <tt>#include <iostream></tt> in library files is -hereby <b><em>forbidden</em></b>, because many common implementations -transparently inject a <a href="#ci_static_ctors">static constructor</a> into -every translation unit that includes it.</p> - -<p>Note that using the other stream headers (<tt><sstream></tt> for -example) is not problematic in this regard — -just <tt><iostream></tt>. However, <tt>raw_ostream</tt> provides various -APIs that are better performing for almost every use than <tt>std::ostream</tt> -style APIs. <b>Therefore new code should always -use <a href="#ll_raw_ostream"><tt>raw_ostream</tt></a> for writing, or -the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a> -</h4> - -<div> - -<p>LLVM includes a lightweight, simple, and efficient stream implementation -in <tt>llvm/Support/raw_ostream.h</tt>, which provides all of the common -features of <tt>std::ostream</tt>. All new code should use <tt>raw_ostream</tt> -instead of <tt>ostream</tt>.</p> - -<p>Unlike <tt>std::ostream</tt>, <tt>raw_ostream</tt> is not a template and can -be forward declared as <tt>class raw_ostream</tt>. Public headers should -generally not include the <tt>raw_ostream</tt> header, but use forward -declarations and constant references to <tt>raw_ostream</tt> instances.</p> - -</div> - - -<!-- _______________________________________________________________________ --> -<h4> - <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a> -</h4> - -<div> - -<p>The <tt>std::endl</tt> modifier, when used with <tt>iostreams</tt> outputs a -newline to the output stream specified. In addition to doing this, however, it -also flushes the output stream. In other words, these are equivalent:</p> - -<div class="doc_code"> -<pre> -std::cout << std::endl; -std::cout << '\n' << std::flush; -</pre> -</div> - -<p>Most of the time, you probably have no reason to flush the output stream, so -it's better to use a literal <tt>'\n'</tt>.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="nano">Microscopic Details</a> -</h3> -<!-- ======================================================================= --> - -<div> - -<p>This section describes preferred low-level formatting guidelines along with -reasoning on why we prefer them.</p> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="micro_spaceparen">Spaces Before Parentheses</a> -</h4> - -<div> - -<p>We prefer to put a space before an open parenthesis only in control flow -statements, but not in normal function call expressions and function-like -macros. For example, this is good:</p> - -<div class="doc_code"> -<pre> -<b>if (</b>x) ... -<b>for (</b>i = 0; i != 100; ++i) ... -<b>while (</b>llvm_rocks) ... - -<b>somefunc(</b>42); -<b><a href="#ll_assert">assert</a>(</b>3 != 4 && "laws of math are failing me"); - -a = <b>foo(</b>42, 92) + <b>bar(</b>x); -</pre> -</div> - -<p>and this is bad:</p> - -<div class="doc_code"> -<pre> -<b>if(</b>x) ... -<b>for(</b>i = 0; i != 100; ++i) ... -<b>while(</b>llvm_rocks) ... - -<b>somefunc (</b>42); -<b><a href="#ll_assert">assert</a> (</b>3 != 4 && "laws of math are failing me"); - -a = <b>foo (</b>42, 92) + <b>bar (</b>x); -</pre> -</div> - -<p>The reason for doing this is not completely arbitrary. This style makes -control flow operators stand out more, and makes expressions flow better. The -function call operator binds very tightly as a postfix operator. Putting a -space after a function name (as in the last example) makes it appear that the -code might bind the arguments of the left-hand-side of a binary operator with -the argument list of a function and the name of the right side. More -specifically, it is easy to misread the "a" example as:</p> - -<div class="doc_code"> -<pre> -a = foo <b>(</b>(42, 92) + bar<b>)</b> (x); -</pre> -</div> - -<p>when skimming through the code. By avoiding a space in a function, we avoid -this misinterpretation.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="micro_preincrement">Prefer Preincrement</a> -</h4> - -<div> - -<p>Hard fast rule: Preincrement (<tt>++X</tt>) may be no slower than -postincrement (<tt>X++</tt>) and could very well be a lot faster than it. Use -preincrementation whenever possible.</p> - -<p>The semantics of postincrement include making a copy of the value being -incremented, returning it, and then preincrementing the "work value". For -primitive types, this isn't a big deal... but for iterators, it can be a huge -issue (for example, some iterators contains stack and set objects in them... -copying an iterator could invoke the copy ctor's of these as well). In general, -get in the habit of always using preincrement, and you won't have a problem.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="micro_namespaceindent">Namespace Indentation</a> -</h4> - -<div> - -<p> -In general, we strive to reduce indentation wherever possible. This is useful -because we want code to <a href="#scf_codewidth">fit into 80 columns</a> without -wrapping horribly, but also because it makes it easier to understand the code. -Namespaces are a funny thing: they are often large, and we often desire to put -lots of stuff into them (so they can be large). Other times they are tiny, -because they just hold an enum or something similar. In order to balance this, -we use different approaches for small versus large namespaces. -</p> - -<p> -If a namespace definition is small and <em>easily</em> fits on a screen (say, -less than 35 lines of code), then you should indent its body. Here's an -example: -</p> - -<div class="doc_code"> -<pre> -namespace llvm { - namespace X86 { - /// RelocationType - An enum for the x86 relocation codes. Note that - /// the terminology here doesn't follow x86 convention - word means - /// 32-bit and dword means 64-bit. - enum RelocationType { - /// reloc_pcrel_word - PC relative relocation, add the relocated value to - /// the value already in memory, after we adjust it for where the PC is. - reloc_pcrel_word = 0, - - /// reloc_picrel_word - PIC base relative relocation, add the relocated - /// value to the value already in memory, after we adjust it for where the - /// PIC base is. - reloc_picrel_word = 1, - - /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just - /// add the relocated value to the value already in memory. - reloc_absolute_word = 2, - reloc_absolute_dword = 3 - }; - } -} -</pre> -</div> - -<p>Since the body is small, indenting adds value because it makes it very clear -where the namespace starts and ends, and it is easy to take the whole thing in -in one "gulp" when reading the code. If the blob of code in the namespace is -larger (as it typically is in a header in the <tt>llvm</tt> or <tt>clang</tt> namespaces), do not -indent the code, and add a comment indicating what namespace is being closed. -For example:</p> - -<div class="doc_code"> -<pre> -namespace llvm { -namespace knowledge { - -/// Grokable - This class represents things that Smith can have an intimate -/// understanding of and contains the data associated with it. -class Grokable { -... -public: - explicit Grokable() { ... } - virtual ~Grokable() = 0; - - ... - -}; - -} // end namespace knowledge -} // end namespace llvm -</pre> -</div> - -<p>Because the class is large, we don't expect that the reader can easily -understand the entire concept in a glance, and the end of the file (where the -namespaces end) may be a long ways away from the place they open. As such, -indenting the contents of the namespace doesn't add any value, and detracts from -the readability of the class. In these cases it is best to <em>not</em> indent -the contents of the namespace.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h4> - <a name="micro_anonns">Anonymous Namespaces</a> -</h4> - -<div> - -<p>After talking about namespaces in general, you may be wondering about -anonymous namespaces in particular. -Anonymous namespaces are a great language feature that tells the C++ compiler -that the contents of the namespace are only visible within the current -translation unit, allowing more aggressive optimization and eliminating the -possibility of symbol name collisions. Anonymous namespaces are to C++ as -"static" is to C functions and global variables. While "static" is available -in C++, anonymous namespaces are more general: they can make entire classes -private to a file.</p> - -<p>The problem with anonymous namespaces is that they naturally want to -encourage indentation of their body, and they reduce locality of reference: if -you see a random function definition in a C++ file, it is easy to see if it is -marked static, but seeing if it is in an anonymous namespace requires scanning -a big chunk of the file.</p> - -<p>Because of this, we have a simple guideline: make anonymous namespaces as -small as possible, and only use them for class declarations. For example, this -is good:</p> - -<div class="doc_code"> -<pre> -<b>namespace {</b> - class StringSort { - ... - public: - StringSort(...) - bool operator<(const char *RHS) const; - }; -<b>} // end anonymous namespace</b> - -static void Helper() { - ... -} - -bool StringSort::operator<(const char *RHS) const { - ... -} - -</pre> -</div> - -<p>This is bad:</p> - - -<div class="doc_code"> -<pre> -<b>namespace {</b> -class StringSort { -... -public: - StringSort(...) - bool operator<(const char *RHS) const; -}; - -void Helper() { - ... -} - -bool StringSort::operator<(const char *RHS) const { - ... -} - -<b>} // end anonymous namespace</b> - -</pre> -</div> - - -<p>This is bad specifically because if you're looking at "Helper" in the middle -of a large C++ file, that you have no immediate way to tell if it is local to -the file. When it is marked static explicitly, this is immediately obvious. -Also, there is no reason to enclose the definition of "operator<" in the -namespace just because it was declared there. -</p> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="seealso">See Also</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>A lot of these comments and recommendations have been culled for other -sources. Two particularly important books for our work are:</p> - -<ol> - -<li><a href="http://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876">Effective -C++</a> by Scott Meyers. Also -interesting and useful are "More Effective C++" and "Effective STL" by the same -author.</li> - -<li>Large-Scale C++ Software Design by John Lakos</li> - -</ol> - -<p>If you get some free time, and you haven't read them: do so, you might learn -something.</p> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst new file mode 100644 index 0000000000..a416a1e856 --- /dev/null +++ b/docs/CodingStandards.rst @@ -0,0 +1,1147 @@ +.. _coding_standards: + +===================== +LLVM Coding Standards +===================== + +.. contents:: + :local: + +Introduction +============ + +This document attempts to describe a few coding standards that are being used in +the LLVM source tree. Although no coding standards should be regarded as +absolute requirements to be followed in all instances, coding standards are +particularly important for large-scale code bases that follow a library-based +design (like LLVM). + +This document intentionally does not prescribe fixed standards for religious +issues such as brace placement and space usage. For issues like this, follow +the golden rule: + +.. _Golden Rule: + + **If you are extending, enhancing, or bug fixing already implemented code, + use the style that is already being used so that the source is uniform and + easy to follow.** + +Note that some code bases (e.g. ``libc++``) have really good reasons to deviate +from the coding standards. In the case of ``libc++``, this is because the +naming and other conventions are dictated by the C++ standard. If you think +there is a specific good reason to deviate from the standards here, please bring +it up on the LLVMdev mailing list. + +There are some conventions that are not uniformly followed in the code base +(e.g. the naming convention). This is because they are relatively new, and a +lot of code was written before they were put in place. Our long term goal is +for the entire codebase to follow the convention, but we explicitly *do not* +want patches that do large-scale reformating of existing code. On the other +hand, it is reasonable to rename the methods of a class if you're about to +change it in some other way. Just do the reformating as a separate commit from +the functionality change. + +The ultimate goal of these guidelines is the increase readability and +maintainability of our common source base. If you have suggestions for topics to +be included, please mail them to `Chris <mailto:sabre@nondot.org>`_. + +Mechanical Source Issues +======================== + +Source Code Formatting +---------------------- + +Commenting +^^^^^^^^^^ + +Comments are one critical part of readability and maintainability. Everyone +knows they should comment their code, and so should you. When writing comments, +write them as English prose, which means they should use proper capitalization, +punctuation, etc. Aim to describe what the code is trying to do and why, not +*how* it does it at a micro level. Here are a few critical things to document: + +.. _header file comment: + +File Headers +"""""""""""" + +Every source file should have a header on it that describes the basic purpose of +the file. If a file does not have a header, it should not be checked into the +tree. The standard header looks like this: + +.. code-block:: c++ + + //===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===// + // + // The LLVM Compiler Infrastructure + // + // This file is distributed under the University of Illinois Open Source + // License. See LICENSE.TXT for details. + // + //===----------------------------------------------------------------------===// + // + // This file contains the declaration of the Instruction class, which is the + // base class for all of the VM instructions. + // + //===----------------------------------------------------------------------===// + +A few things to note about this particular format: The "``-*- C++ -*-``" string +on the first line is there to tell Emacs that the source file is a C++ file, not +a C file (Emacs assumes ``.h`` files are C files by default). + +.. note:: + + This tag is not necessary in ``.cpp`` files. The name of the file is also + on the first line, along with a very short description of the purpose of the + file. This is important when printing out code and flipping though lots of + pages. + +The next section in the file is a concise note that defines the license that the +file is released under. This makes it perfectly clear what terms the source +code can be distributed under and should not be modified in any way. + +The main body of the description does not have to be very long in most cases. +Here it's only two lines. If an algorithm is being implemented or something +tricky is going on, a reference to the paper where it is published should be +included, as well as any notes or *gotchas* in the code to watch out for. + +Class overviews +""""""""""""""" + +Classes are one fundamental part of a good object oriented design. As such, a +class definition should have a comment block that explains what the class is +used for and how it works. Every non-trivial class is expected to have a +``doxygen`` comment block. + +Method information +"""""""""""""""""" + +Methods defined in a class (as well as any global functions) should also be +documented properly. A quick note about what it does and a description of the +borderline behaviour is all that is necessary here (unless something +particularly tricky or insidious is going on). The hope is that people can +figure out how to use your interfaces without reading the code itself. + +Good things to talk about here are what happens when something unexpected +happens: does the method return null? Abort? Format your hard disk? + +Comment Formatting +^^^^^^^^^^^^^^^^^^ + +In general, prefer C++ style (``//``) comments. They take less space, require +less typing, don't have nesting problems, etc. There are a few cases when it is +useful to use C style (``/* */``) comments however: + +#. When writing C code: Obviously if you are writing C code, use C style + comments. + +#. When writing a header file that may be ``#include``\d by a C source file. + +#. When writing a source file that is used by a tool that only accepts C style + comments. + +To comment out a large block of code, use ``#if 0`` and ``#endif``. These nest +properly and are better behaved in general than C style comments. + +``#include`` Style +^^^^^^^^^^^^^^^^^^ + +Immediately after the `header file comment`_ (and include guards if working on a +header file), the `minimal list of #includes`_ required by the file should be +listed. We prefer these ``#include``\s to be listed in this order: + +.. _Main Module Header: +.. _Local/Private Headers: + +#. Main Module Header +#. Local/Private Headers +#. ``llvm/*`` +#. ``llvm/Analysis/*`` +#. ``llvm/Assembly/*`` +#. ``llvm/Bitcode/*`` +#. ``llvm/CodeGen/*`` +#. ... +#. ``llvm/Support/*`` +#. ``llvm/Config/*`` +#. System ``#include``\s + +and each category should be sorted by name. + +The `Main Module Header`_ file applies to ``.cpp`` files which implement an +interface defined by a ``.h`` file. This ``#include`` should always be included +**first** regardless of where it lives on the file system. By including a +header file first in the ``.cpp`` files that implement the interfaces, we ensure +that the header does not have any hidden dependencies which are not explicitly +``#include``\d in the header, but should be. It is also a form of documentation +in the ``.cpp`` file to indicate where the interfaces it implements are defined. + +.. _fit into 80 columns: + +Source Code Width +^^^^^^^^^^^^^^^^^ + +Write your code to fit within 80 columns of text. This helps those of us who +like to print out code and look at your code in an ``xterm`` without resizing +it. + +The longer answer is that there must be some limit to the width of the code in +order to reasonably allow developers to have multiple files side-by-side in +windows on a modest display. If you are going to pick a width limit, it is +somewhat arbitrary but you might as well pick something standard. Going with 90 +columns (for example) instead of 80 columns wouldn't add any significant value +and would be detrimental to printing out code. Also many other projects have +standardized on 80 columns, so some people have already configured their editors +for it (vs something else, like 90 columns). + +This is one of many contentious issues in coding standards, but it is not up for +debate. + +Use Spaces Instead of Tabs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In all cases, prefer spaces to tabs in source files. People have different +preferred indentation levels, and different styles of indentation that they +like; this is fine. What isn't fine is that different editors/viewers expand +tabs out to different tab stops. This can cause your code to look completely +unreadable, and it is not worth dealing with. + +As always, follow the `Golden Rule`_ above: follow the style of +existing code if you are modifying and extending it. If you like four spaces of +indentation, **DO NOT** do that in the middle of a chunk of code with two spaces +of indentation. Also, do not reindent a whole source file: it makes for +incredible diffs that are absolutely worthless. + +Indent Code Consistently +^^^^^^^^^^^^^^^^^^^^^^^^ + +Okay, in your first year of programming you were told that indentation is +important. If you didn't believe and internalize this then, now is the time. +Just do it. + +Compiler Issues +--------------- + +Treat Compiler Warnings Like Errors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your code has compiler warnings in it, something is wrong --- you aren't +casting values correctly, you have "questionable" constructs in your code, or +you are doing something legitimately wrong. Compiler warnings can cover up +legitimate errors in output and make dealing with a translation unit difficult. + +It is not possible to prevent all warnings from all compilers, nor is it +desirable. Instead, pick a standard compiler (like ``gcc``) that provides a +good thorough set of warnings, and stick to it. At least in the case of +``gcc``, it is possible to work around any spurious errors by changing the +syntax of the code slightly. For example, a warning that annoys me occurs when +I write code like this: + +.. code-block:: c++ + + if (V = getValue()) { + ... + } + +``gcc`` will warn me that I probably want to use the ``==`` operator, and that I +probably mistyped it. In most cases, I haven't, and I really don't want the +spurious errors. To fix this particular problem, I rewrite the code like +this: + +.. code-block:: c++ + + if ((V = getValue())) { + ... + } + +which shuts ``gcc`` up. Any ``gcc`` warning that annoys you can be fixed by +massaging the code appropriately. + +Write Portable Code +^^^^^^^^^^^^^^^^^^^ + +In almost all cases, it is possible and within reason to write completely +portable code. If there are cases where it isn't possible to write portable +code, isolate it behind a well defined (and well documented) interface. + +In practice, this means that you shouldn't assume much about the host compiler +(and Visual Studio tends to be the lowest common denominator). If advanced +features are used, they should only be an implementation detail of a library +which has a simple exposed API, and preferably be buried in ``libSystem``. + +Do not use RTTI or Exceptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In an effort to reduce code and executable size, LLVM does not use RTTI +(e.g. ``dynamic_cast<>;``) or exceptions. These two language features violate +the general C++ principle of *"you only pay for what you use"*, causing +executable bloat even if exceptions are never used in the code base, or if RTTI +is never used for a class. Because of this, we turn them off globally in the +code. + +That said, LLVM does make extensive use of a hand-rolled form of RTTI that use +templates like `isa<>, cast<>, and dyn_cast<> <ProgrammersManual.html#isa>`_. +This form of RTTI is opt-in and can be added to any class. It is also +substantially more efficient than ``dynamic_cast<>``. + +.. _static constructor: + +Do not use Static Constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Static constructors and destructors (e.g. global variables whose types have a +constructor or destructor) should not be added to the code base, and should be +removed wherever possible. Besides `well known problems +<http://yosefk.com/c++fqa/ctors.html#fqa-10.12>`_ where the order of +initialization is undefined between globals in different source files, the +entire concept of static constructors is at odds with the common use case of +LLVM as a library linked into a larger application. + +Consider the use of LLVM as a JIT linked into another application (perhaps for +`OpenGL, custom languages <http://llvm.org/Users.html>`_, `shaders in movies +<http://llvm.org/devmtg/2010-11/Gritz-OpenShadingLang.pdf>`_, etc). Due to the +design of static constructors, they must be executed at startup time of the +entire application, regardless of whether or how LLVM is used in that larger +application. There are two problems with this: + +* The time to run the static constructors impacts startup time of applications + --- a critical time for GUI apps, among others. + +* The static constructors cause the app to pull many extra pages of memory off + the disk: both the code for the constructor in each ``.o`` file and the small + amount of data that gets touched. In addition, touched/dirty pages put more + pressure on the VM system on low-memory machines. + +We would really like for there to be zero cost for linking in an additional LLVM +target or other library into an application, but static constructors violate +this goal. + +That said, LLVM unfortunately does contain static constructors. It would be a +`great project <http://llvm.org/PR11944>`_ for someone to purge all static +constructors from LLVM, and then enable the ``-Wglobal-constructors`` warning +flag (when building with Clang) to ensure we do not regress in the future. + +Use of ``class`` and ``struct`` Keywords +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In C++, the ``class`` and ``struct`` keywords can be used almost +interchangeably. The only difference is when they are used to declare a class: +``class`` makes all members private by default while ``struct`` makes all +members public by default. + +Unfortunately, not all compilers follow the rules and some will generate +different symbols based on whether ``class`` or ``struct`` was used to declare +the symbol. This can lead to problems at link time. + +So, the rule for LLVM is to always use the ``class`` keyword, unless **all** +members are public and the type is a C++ `POD +<http://en.wikipedia.org/wiki/Plain_old_data_structure>`_ type, in which case +``struct`` is allowed. + +Style Issues +============ + +The High-Level Issues +--------------------- + +A Public Header File **is** a Module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +C++ doesn't do too well in the modularity department. There is no real +encapsulation or data hiding (unless you use expensive protocol classes), but it +is what we have to work with. When you write a public header file (in the LLVM +source tree, they live in the top level "``include``" directory), you are +defining a module of functionality. + +Ideally, modules should be completely independent of each other, and their +header files should only ``#include`` the absolute minimum number of headers +possible. A module is not just a class, a function, or a namespace: it's a +collection of these that defines an interface. This interface may be several +functions, classes, or data structures, but the important issue is how they work +together. + +In general, a module should be implemented by one or more ``.cpp`` files. Each +of these ``.cpp`` files should include the header that defines their interface +first. This ensures that all of the dependences of the module header have been +properly added to the module header itself, and are not implicit. System +headers should be included after user headers for a translation unit. + +.. _minimal list of #includes: + +``#include`` as Little as Possible +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``#include`` hurts compile time performance. Don't do it unless you have to, +especially in header files. + +But wait! Sometimes you need to have the definition of a class to use it, or to +inherit from it. In these cases go ahead and ``#include`` that header file. Be +aware however that there are many cases where you don't need to have the full +definition of a class. If you are using a pointer or reference to a class, you +don't need the header file. If you are simply returning a class instance from a +prototyped function or method, you don't need it. In fact, for most cases, you +simply don't need the definition of a class. And not ``#include``\ing speeds up +compilation. + +It is easy to try to go too overboard on this recommendation, however. You +**must** include all of the header files that you are using --- you can include +them either directly or indirectly through another header file. To make sure +that you don't accidentally forget to include a header file in your module +header, make sure to include your module header **first** in the implementation +file (as mentioned above). This way there won't be any hidden dependencies that +you'll find out about later. + +Keep "Internal" Headers Private +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Many modules have a complex implementation that causes them to use more than one +implementation (``.cpp``) file. It is often tempting to put the internal +communication interface (helper classes, extra functions, etc) in the public +module header file. Don't do this! + +If you really need to do something like this, put a private header file in the +same directory as the source files, and include it locally. This ensures that +your private interface remains private and undisturbed by outsiders. + +.. note:: + + It's okay to put extra implementation methods in a public class itself. Just + make them private (or protected) and all is well. + +.. _early exits: + +Use Early Exits and ``continue`` to Simplify Code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When reading code, keep in mind how much state and how many previous decisions +have to be remembered by the reader to understand a block of code. Aim to +reduce indentation where possible when it doesn't make it more difficult to +understand the code. One great way to do this is by making use of early exits +and the ``continue`` keyword in long loops. As an example of using an early +exit from a function, consider this "bad" code: + +.. code-block:: c++ + + Value *DoSomething(Instruction *I) { + if (!isa<TerminatorInst>(I) && + I->hasOneUse() && SomeOtherThing(I)) { + ... some long code .... + } + + return 0; + } + +This code has several problems if the body of the ``'if'`` is large. When +you're looking at the top of the function, it isn't immediately clear that this +*only* does interesting things with non-terminator instructions, and only +applies to things with the other predicates. Second, it is relatively difficult +to describe (in comments) why these predicates are important because the ``if`` +statement makes it difficult to lay out the comments. Third, when you're deep +within the body of the code, it is indented an extra level. Finally, when +reading the top of the function, it isn't clear what the result is if the +predicate isn't true; you have to read to the end of the function to know that +it returns null. + +It is much preferred to format the code like this: + +.. code-block:: c++ + + Value *DoSomething(Instruction *I) { + // Terminators never need 'something' done to them because ... + if (isa<TerminatorInst>(I)) + return 0; + + // We conservatively avoid transforming instructions with multiple uses + // because goats like cheese. + if (!I->hasOneUse()) + return 0; + + // This is really just here for example. + if (!SomeOtherThing(I)) + return 0; + + ... some long code .... + } + +This fixes these problems. A similar problem frequently happens in ``for`` +loops. A silly example is something like this: + +.. code-block:: c++ + + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(II)) { + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS != RHS) { + ... + } + } + } + +When you have very, very small loops, this sort of structure is fine. But if it +exceeds more than 10-15 lines, it becomes difficult for people to read and +understand at a glance. The problem with this sort of code is that it gets very +nested very quickly. Meaning that the reader of the code has to keep a lot of +context in their brain to remember what is going immediately on in the loop, +because they don't know if/when the ``if`` conditions will have ``else``\s etc. +It is strongly preferred to structure the loop like this: + +.. code-block:: c++ + + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + BinaryOperator *BO = dyn_cast<BinaryOperator>(II); + if (!BO) continue; + + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS == RHS) continue; + + ... + } + +This has all the benefits of using early exits for functions: it reduces nesting +of the loop, it makes it easier to describe why the conditions are true, and it +makes it obvious to the reader that there is no ``else`` coming up that they +have to push context into their brain for. If a loop is large, this can be a +big understandability win. + +Don't use ``else`` after a ``return`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For similar reasons above (reduction of indentation and easier reading), please +do not use ``'else'`` or ``'else if'`` after something that interrupts control +flow --- like ``return``, ``break``, ``continue``, ``goto``, etc. For +example, this is *bad*: + +.. code-block:: c++ + + case 'J': { + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); + } else { + break; + } + } else { + Type = Context.getjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_jmp_buf; + return QualType(); + } else { + break; + } + } + } + +It is better to write it like this: + +.. code-block:: c++ + + case 'J': + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); + } + } else { + Type = Context.getjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_jmp_buf; + return QualType(); + } + } + break; + +Or better yet (in this case) as: + +.. code-block:: c++ + + case 'J': + if (Signed) + Type = Context.getsigjmp_bufType(); + else + Type = Context.getjmp_bufType(); + + if (Type.isNull()) { + Error = Signed ? ASTContext::GE_Missing_sigjmp_buf : + ASTContext::GE_Missing_jmp_buf; + return QualType(); + } + break; + +The idea is to reduce indentation and the amount of code you have to keep track +of when reading the code. + +Turn Predicate Loops into Predicate Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is very common to write small loops that just compute a boolean value. There +are a number of ways that people commonly write these, but an example of this +sort of thing is: + +.. code-block:: c++ + + bool FoundFoo = false; + for (unsigned i = 0, e = BarList.size(); i != e; ++i) + if (BarList[i]->isFoo()) { + FoundFoo = true; + break; + } + + if (FoundFoo) { + ... + } + +This sort of code is awkward to write, and is almost always a bad sign. Instead +of this sort of loop, we strongly prefer to use a predicate function (which may +be `static`_) that uses `early exits`_ to compute the predicate. We prefer the +code to be structured like this: + +.. code-block:: c++ + + /// ListContainsFoo - Return true if the specified list has an element that is + /// a foo. + static bool ListContainsFoo(const std::vector<Bar*> &List) { + for (unsigned i = 0, e = List.size(); i != e; ++i) + if (List[i]->isFoo()) + return true; + return false; + } + ... + + if (ListContainsFoo(BarList)) { + ... + } + +There are many reasons for doing this: it reduces indentation and factors out +code which can often be shared by other code that checks for the same predicate. +More importantly, it *forces you to pick a name* for the function, and forces +you to write a comment for it. In this silly example, this doesn't add much +value. However, if the condition is complex, this can make it a lot easier for +the reader to understand the code that queries for this predicate. Instead of +being faced with the in-line details of how we check to see if the BarList +contains a foo, we can trust the function name and continue reading with better +locality. + +The Low-Level Issues +-------------------- + +Name Types, Functions, Variables, and Enumerators Properly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Poorly-chosen names can mislead the reader and cause bugs. We cannot stress +enough how important it is to use *descriptive* names. Pick names that match +the semantics and role of the underlying entities, within reason. Avoid +abbreviations unless they are well known. After picking a good name, make sure +to use consistent capitalization for the name, as inconsistency requires clients +to either memorize the APIs or to look it up to find the exact spelling. + +In general, names should be in camel case (e.g. ``TextFileReader`` and +``isLValue()``). Different kinds of declarations have different rules: + +* **Type names** (including classes, structs, enums, typedefs, etc) should be + nouns and start with an upper-case letter (e.g. ``TextFileReader``). + +* **Variable names** should be nouns (as they represent state). The name should + be camel case, and start with an upper case letter (e.g. ``Leader`` or + ``Boats``). + +* **Function names** should be verb phrases (as they represent actions), and + command-like function should be imperative. The name should be camel case, + and start with a lower case letter (e.g. ``openFile()`` or ``isFoo()``). + +* **Enum declarations** (e.g. ``enum Foo {...}``) are types, so they should + follow the naming conventions for types. A common use for enums is as a + discriminator for a union, or an indicator of a subclass. When an enum is + used for something like this, it should have a ``Kind`` suffix + (e.g. ``ValueKind``). + +* **Enumerators** (e.g. ``enum { Foo, Bar }``) and **public member variables** + should start with an upper-case letter, just like types. Unless the + enumerators are defined in their own small namespace or inside a class, + enumerators should have a prefix corresponding to the enum declaration name. + For example, ``enum ValueKind { ... };`` may contain enumerators like + ``VK_Argument``, ``VK_BasicBlock``, etc. Enumerators that are just + convenience constants are exempt from the requirement for a prefix. For + instance: + + .. code-block:: c++ + + enum { + MaxSize = 42, + Density = 12 + }; + +As an exception, classes that mimic STL classes can have member names in STL's +style of lower-case words separated by underscores (e.g. ``begin()``, +``push_back()``, and ``empty()``). + +Here are some examples of good and bad names: + +.. code-block:: c++ + + class VehicleMaker { + ... + Factory<Tire> F; // Bad -- abbreviation and non-descriptive. + Factory<Tire> Factory; // Better. + Factory<Tire> TireFactory; // Even better -- if VehicleMaker has more than one + // kind of factories. + }; + + Vehicle MakeVehicle(VehicleType Type) { + VehicleMaker M; // Might be OK if having a short life-span. + Tire tmp1 = M.makeTire(); // Bad -- 'tmp1' provides no information. + Light headlight = M.makeLight("head"); // Good -- descriptive. + ... + } + +Assert Liberally +^^^^^^^^^^^^^^^^ + +Use the "``assert``" macro to its fullest. Check all of your preconditions and +assumptions, you never know when a bug (not necessarily even yours) might be +caught early by an assertion, which reduces debugging time dramatically. The +"``<cassert>``" header file is probably already included by the header files you +are using, so it doesn't cost anything to use it. + +To further assist with debugging, make sure to put some kind of error message in +the assertion statement, which is printed if the assertion is tripped. This +helps the poor debugger make sense of why an assertion is being made and +enforced, and hopefully what to do about it. Here is one complete example: + +.. code-block:: c++ + + inline Value *getOperand(unsigned i) { + assert(i < Operands.size() && "getOperand() out of range!"); + return Operands[i]; + } + +Here are more examples: + +.. code-block:: c++ + + assert(Ty->isPointerType() && "Can't allocate a non pointer type!"); + + assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); + + assert(idx < getNumSuccessors() && "Successor # out of range!"); + + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + + assert(isa<PHINode>(Succ->front()) && "Only works on PHId BBs!"); + +You get the idea. + +Please be aware that, when adding assert statements, not all compilers are aware +of the semantics of the assert. In some places, asserts are used to indicate a +piece of code that should not be reached. These are typically of the form: + +.. code-block:: c++ + + assert(0 && "Some helpful error message"); + +When used in a function that returns a value, they should be followed with a +return statement and a comment indicating that this line is never reached. This +will prevent a compiler which is unable to deduce that the assert statement +never returns from generating a warning. + +.. code-block:: c++ + + assert(0 && "Some helpful error message"); + return 0; + +Another issue is that values used only by assertions will produce an "unused +value" warning when assertions are disabled. For example, this code will warn: + +.. code-block:: c++ + + unsigned Size = V.size(); + assert(Size > 42 && "Vector smaller than it should be"); + + bool NewToSet = Myset.insert(Value); + assert(NewToSet && "The value shouldn't be in the set yet"); + +These are two interesting different cases. In the first case, the call to +``V.size()`` is only useful for the assert, and we don't want it executed when +assertions are disabled. Code like this should move the call into the assert +itself. In the second case, the side effects of the call must happen whether +the assert is enabled or not. In this case, the value should be cast to void to +disable the warning. To be specific, it is preferred to write the code like +this: + +.. code-block:: c++ + + assert(V.size() > 42 && "Vector smaller than it should be"); + + bool NewToSet = Myset.insert(Value); (void)NewToSet; + assert(NewToSet && "The value shouldn't be in the set yet"); + +Do Not Use ``using namespace std`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In LLVM, we prefer to explicitly prefix all identifiers from the standard +namespace with an "``std::``" prefix, rather than rely on "``using namespace +std;``". + +In header files, adding a ``'using namespace XXX'`` directive pollutes the +namespace of any source file that ``#include``\s the header. This is clearly a +bad thing. + +In implementation files (e.g. ``.cpp`` files), the rule is more of a stylistic +rule, but is still important. Basically, using explicit namespace prefixes +makes the code **clearer**, because it is immediately obvious what facilities +are being used and where they are coming from. And **more portable**, because +namespace clashes cannot occur between LLVM code and other namespaces. The +portability rule is important because different standard library implementations +expose different symbols (potentially ones they shouldn't), and future revisions +to the C++ standard will add more symbols to the ``std`` namespace. As such, we +never use ``'using namespace std;'`` in LLVM. + +The exception to the general rule (i.e. it's not an exception for the ``std`` +namespace) is for implementation files. For example, all of the code in the +LLVM project implements code that lives in the 'llvm' namespace. As such, it is +ok, and actually clearer, for the ``.cpp`` files to have a ``'using namespace +llvm;'`` directive at the top, after the ``#include``\s. This reduces +indentation in the body of the file for source editors that indent based on +braces, and keeps the conceptual context cleaner. The general form of this rule +is that any ``.cpp`` file that implements code in any namespace may use that +namespace (and its parents'), but should not use any others. + +Provide a Virtual Method Anchor for Classes in Headers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If a class is defined in a header file and has a vtable (either it has virtual +methods or it derives from classes with virtual methods), it must always have at +least one out-of-line virtual method in the class. Without this, the compiler +will copy the vtable and RTTI into every ``.o`` file that ``#include``\s the +header, bloating ``.o`` file sizes and increasing link times. + +Don't evaluate ``end()`` every time through a loop +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Because C++ doesn't have a standard "``foreach``" loop (though it can be +emulated with macros and may be coming in C++'0x) we end up writing a lot of +loops that manually iterate from begin to end on a variety of containers or +through other data structures. One common mistake is to write a loop in this +style: + +.. code-block:: c++ + + BasicBlock *BB = ... + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + ... use I ... + +The problem with this construct is that it evaluates "``BB->end()``" every time +through the loop. Instead of writing the loop like this, we strongly prefer +loops to be written so that they evaluate it once before the loop starts. A +convenient way to do this is like so: + +.. code-block:: c++ + + BasicBlock *BB = ... + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + ... use I ... + +The observant may quickly point out that these two loops may have different +semantics: if the container (a basic block in this case) is being mutated, then +"``BB->end()``" may change its value every time through the loop and the second +loop may not in fact be correct. If you actually do depend on this behavior, +please write the loop in the first form and add a comment indicating that you +did it intentionally. + +Why do we prefer the second form (when correct)? Writing the loop in the first +form has two problems. First it may be less efficient than evaluating it at the +start of the loop. In this case, the cost is probably minor --- a few extra +loads every time through the loop. However, if the base expression is more +complex, then the cost can rise quickly. I've seen loops where the end +expression was actually something like: "``SomeMap[x]->end()``" and map lookups +really aren't cheap. By writing it in the second form consistently, you +eliminate the issue entirely and don't even have to think about it. + +The second (even bigger) issue is that writing the loop in the first form hints +to the reader that the loop is mutating the container (a fact that a comment +would handily confirm!). If you write the loop in the second form, it is +immediately obvious without even looking at the body of the loop that the +container isn't being modified, which makes it easier to read the code and +understand what it does. + +While the second form of the loop is a few extra keystrokes, we do strongly +prefer it. + +``#include <iostream>`` is Forbidden +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The use of ``#include <iostream>`` in library files is hereby **forbidden**, +because many common implementations transparently inject a `static constructor`_ +into every translation unit that includes it. + +Note that using the other stream headers (``<sstream>`` for example) is not +problematic in this regard --- just ``<iostream>``. However, ``raw_ostream`` +provides various APIs that are better performing for almost every use than +``std::ostream`` style APIs. + +.. note:: + + New code should always use `raw_ostream`_ for writing, or the + ``llvm::MemoryBuffer`` API for reading files. + +.. _raw_ostream: + +Use ``raw_ostream`` +^^^^^^^^^^^^^^^^^^^ + +LLVM includes a lightweight, simple, and efficient stream implementation in +``llvm/Support/raw_ostream.h``, which provides all of the common features of +``std::ostream``. All new code should use ``raw_ostream`` instead of +``ostream``. + +Unlike ``std::ostream``, ``raw_ostream`` is not a template and can be forward +declared as ``class raw_ostream``. Public headers should generally not include +the ``raw_ostream`` header, but use forward declarations and constant references +to ``raw_ostream`` instances. + +Avoid ``std::endl`` +^^^^^^^^^^^^^^^^^^^ + +The ``std::endl`` modifier, when used with ``iostreams`` outputs a newline to +the output stream specified. In addition to doing this, however, it also +flushes the output stream. In other words, these are equivalent: + +.. code-block:: c++ + + std::cout << std::endl; + std::cout << '\n' << std::flush; + +Most of the time, you probably have no reason to flush the output stream, so +it's better to use a literal ``'\n'``. + +Microscopic Details +------------------- + +This section describes preferred low-level formatting guidelines along with +reasoning on why we prefer them. + +Spaces Before Parentheses +^^^^^^^^^^^^^^^^^^^^^^^^^ + +We prefer to put a space before an open parenthesis only in control flow +statements, but not in normal function call expressions and function-like +macros. For example, this is good: + +.. code-block:: c++ + + if (x) ... + for (i = 0; i != 100; ++i) ... + while (llvm_rocks) ... + + somefunc(42); + assert(3 != 4 && "laws of math are failing me"); + + a = foo(42, 92) + bar(x); + +and this is bad: + +.. code-block:: c++ + + if(x) ... + for(i = 0; i != 100; ++i) ... + while(llvm_rocks) ... + + somefunc (42); + assert (3 != 4 && "laws of math are failing me"); + + a = foo (42, 92) + bar (x); + +The reason for doing this is not completely arbitrary. This style makes control +flow operators stand out more, and makes expressions flow better. The function +call operator binds very tightly as a postfix operator. Putting a space after a +function name (as in the last example) makes it appear that the code might bind +the arguments of the left-hand-side of a binary operator with the argument list +of a function and the name of the right side. More specifically, it is easy to +misread the "``a``" example as: + +.. code-block:: c++ + + a = foo ((42, 92) + bar) (x); + +when skimming through the code. By avoiding a space in a function, we avoid +this misinterpretation. + +Prefer Preincrement +^^^^^^^^^^^^^^^^^^^ + +Hard fast rule: Preincrement (``++X``) may be no slower than postincrement +(``X++``) and could very well be a lot faster than it. Use preincrementation +whenever possible. + +The semantics of postincrement include making a copy of the value being +incremented, returning it, and then preincrementing the "work value". For +primitive types, this isn't a big deal. But for iterators, it can be a huge +issue (for example, some iterators contains stack and set objects in them... +copying an iterator could invoke the copy ctor's of these as well). In general, +get in the habit of always using preincrement, and you won't have a problem. + + +Namespace Indentation +^^^^^^^^^^^^^^^^^^^^^ + +In general, we strive to reduce indentation wherever possible. This is useful +because we want code to `fit into 80 columns`_ without wrapping horribly, but +also because it makes it easier to understand the code. Namespaces are a funny +thing: they are often large, and we often desire to put lots of stuff into them +(so they can be large). Other times they are tiny, because they just hold an +enum or something similar. In order to balance this, we use different +approaches for small versus large namespaces. + +If a namespace definition is small and *easily* fits on a screen (say, less than +35 lines of code), then you should indent its body. Here's an example: + +.. code-block:: c++ + + namespace llvm { + namespace X86 { + /// RelocationType - An enum for the x86 relocation codes. Note that + /// the terminology here doesn't follow x86 convention - word means + /// 32-bit and dword means 64-bit. + enum RelocationType { + /// reloc_pcrel_word - PC relative relocation, add the relocated value to + /// the value already in memory, after we adjust it for where the PC is. + reloc_pcrel_word = 0, + + /// reloc_picrel_word - PIC base relative relocation, add the relocated + /// value to the value already in memory, after we adjust it for where the + /// PIC base is. + reloc_picrel_word = 1, + + /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just + /// add the relocated value to the value already in memory. + reloc_absolute_word = 2, + reloc_absolute_dword = 3 + }; + } + } + +Since the body is small, indenting adds value because it makes it very clear +where the namespace starts and ends, and it is easy to take the whole thing in +in one "gulp" when reading the code. If the blob of code in the namespace is +larger (as it typically is in a header in the ``llvm`` or ``clang`` namespaces), +do not indent the code, and add a comment indicating what namespace is being +closed. For example: + +.. code-block:: c++ + + namespace llvm { + namespace knowledge { + + /// Grokable - This class represents things that Smith can have an intimate + /// understanding of and contains the data associated with it. + class Grokable { + ... + public: + explicit Grokable() { ... } + virtual ~Grokable() = 0; + + ... + + }; + + } // end namespace knowledge + } // end namespace llvm + +Because the class is large, we don't expect that the reader can easily +understand the entire concept in a glance, and the end of the file (where the +namespaces end) may be a long ways away from the place they open. As such, +indenting the contents of the namespace doesn't add any value, and detracts from +the readability of the class. In these cases it is best to *not* indent the +contents of the namespace. + +.. _static: + +Anonymous Namespaces +^^^^^^^^^^^^^^^^^^^^ + +After talking about namespaces in general, you may be wondering about anonymous +namespaces in particular. Anonymous namespaces are a great language feature +that tells the C++ compiler that the contents of the namespace are only visible +within the current translation unit, allowing more aggressive optimization and +eliminating the possibility of symbol name collisions. Anonymous namespaces are +to C++ as "static" is to C functions and global variables. While "``static``" +is available in C++, anonymous namespaces are more general: they can make entire +classes private to a file. + +The problem with anonymous namespaces is that they naturally want to encourage +indentation of their body, and they reduce locality of reference: if you see a +random function definition in a C++ file, it is easy to see if it is marked +static, but seeing if it is in an anonymous namespace requires scanning a big +chunk of the file. + +Because of this, we have a simple guideline: make anonymous namespaces as small +as possible, and only use them for class declarations. For example, this is +good: + +.. code-block:: c++ + + namespace { + class StringSort { + ... + public: + StringSort(...) + bool operator<(const char *RHS) const; + }; + } // end anonymous namespace + + static void Helper() { + ... + } + + bool StringSort::operator<(const char *RHS) const { + ... + } + +This is bad: + +.. code-block:: c++ + + namespace { + class StringSort { + ... + public: + StringSort(...) + bool operator<(const char *RHS) const; + }; + + void Helper() { + ... + } + + bool StringSort::operator<(const char *RHS) const { + ... + } + + } // end anonymous namespace + +This is bad specifically because if you're looking at "``Helper``" in the middle +of a large C++ file, that you have no immediate way to tell if it is local to +the file. When it is marked static explicitly, this is immediately obvious. +Also, there is no reason to enclose the definition of "``operator<``" in the +namespace just because it was declared there. + +See Also +======== + +A lot of these comments and recommendations have been culled for other sources. +Two particularly important books for our work are: + +#. `Effective C++ + <http://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876>`_ + by Scott Meyers. Also interesting and useful are "More Effective C++" and + "Effective STL" by the same author. + +#. `Large-Scale C++ Software Design + <http://www.amazon.com/Large-Scale-Software-Design-John-Lakos/dp/0201633620/ref=sr_1_1>`_ + by John Lakos + +If you get some free time, and you haven't read them: do so, you might learn +something. diff --git a/docs/CommandGuide/llvm-as.rst b/docs/CommandGuide/llvm-as.rst index 749bc7e851..1b499bbe97 100644 --- a/docs/CommandGuide/llvm-as.rst +++ b/docs/CommandGuide/llvm-as.rst @@ -1,18 +1,14 @@ llvm-as - LLVM assembler ======================== - SYNOPSIS -------- - **llvm-as** [*options*] [*filename*] - DESCRIPTION ----------- - **llvm-as** is the LLVM assembler. It reads a file containing human-readable LLVM assembly language, translates it to LLVM bitcode, and writes the result into a file or to standard output. @@ -24,66 +20,37 @@ If an output file is not specified with the **-o** option, then **llvm-as** sends its output to a file or standard output by following these rules: +* If the input is standard input, then the output is standard output. -\* - - If the input is standard input, then the output is standard output. - - - -\* - - If the input is a file that ends with ``.ll``, then the output file is of - the same name, except that the suffix is changed to ``.bc``. - - - -\* - - If the input is a file that does not end with the ``.ll`` suffix, then the - output file has the same name as the input file, except that the ``.bc`` - suffix is appended. - - +* If the input is a file that ends with ``.ll``, then the output file is of the + same name, except that the suffix is changed to ``.bc``. +* If the input is a file that does not end with the ``.ll`` suffix, then the + output file has the same name as the input file, except that the ``.bc`` + suffix is appended. OPTIONS ------- - - **-f** - Enable binary output on terminals. Normally, **llvm-as** will refuse to write raw bitcode output if the output stream is a terminal. With this option, **llvm-as** will write raw bitcode regardless of the output device. - - **-help** - Print a summary of command line options. - - **-o** *filename* - Specify the output file name. If *filename* is ``-``, then **llvm-as** sends its output to standard output. - - - EXIT STATUS ----------- - -If **llvm-as** succeeds, it will exit with 0. Otherwise, if an error -occurs, it will exit with a non-zero value. - +If **llvm-as** succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value. SEE ALSO -------- - llvm-dis|llvm-dis, gccas|gccas diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html deleted file mode 100644 index bf52ad289f..0000000000 --- a/docs/DeveloperPolicy.html +++ /dev/null @@ -1,642 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Developer Policy</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1>LLVM Developer Policy</h1> -<ol> - <li><a href="#introduction">Introduction</a></li> - <li><a href="#policies">Developer Policies</a> - <ol> - <li><a href="#informed">Stay Informed</a></li> - <li><a href="#patches">Making a Patch</a></li> - <li><a href="#reviews">Code Reviews</a></li> - <li><a href="#owners">Code Owners</a></li> - <li><a href="#testcases">Test Cases</a></li> - <li><a href="#quality">Quality</a></li> - <li><a href="#commitaccess">Obtaining Commit Access</a></li> - <li><a href="#newwork">Making a Major Change</a></li> - <li><a href="#incremental">Incremental Development</a></li> - <li><a href="#attribution">Attribution of Changes</a></li> - </ol></li> - <li><a href="#clp">Copyright, License, and Patents</a> - <ol> - <li><a href="#copyright">Copyright</a></li> - <li><a href="#license">License</a></li> - <li><a href="#patents">Patents</a></li> - </ol></li> -</ol> -<div class="doc_author">Written by the LLVM Oversight Team</div> - -<!--=========================================================================--> -<h2><a name="introduction">Introduction</a></h2> -<!--=========================================================================--> -<div> -<p>This document contains the LLVM Developer Policy which defines the project's - policy towards developers and their contributions. The intent of this policy - is to eliminate miscommunication, rework, and confusion that might arise from - the distributed nature of LLVM's development. By stating the policy in clear - terms, we hope each developer can know ahead of time what to expect when - making LLVM contributions. This policy covers all llvm.org subprojects, - including Clang, LLDB, libc++, etc.</p> -<p>This policy is also designed to accomplish the following objectives:</p> - -<ol> - <li>Attract both users and developers to the LLVM project.</li> - - <li>Make life as simple and easy for contributors as possible.</li> - - <li>Keep the top of Subversion trees as stable as possible.</li> - - <li>Establish awareness of the project's <a href="#clp">copyright, - license, and patent policies</a> with contributors to the project.</li> -</ol> - -<p>This policy is aimed at frequent contributors to LLVM. People interested in - contributing one-off patches can do so in an informal way by sending them to - the - <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits - mailing list</a> and engaging another developer to see it through the - process.</p> -</div> - -<!--=========================================================================--> -<h2><a name="policies">Developer Policies</a></h2> -<!--=========================================================================--> -<div> -<p>This section contains policies that pertain to frequent LLVM developers. We - always welcome <a href="#patches">one-off patches</a> from people who do not - routinely contribute to LLVM, but we expect more from frequent contributors - to keep the system as efficient as possible for everyone. Frequent LLVM - contributors are expected to meet the following requirements in order for - LLVM to maintain a high standard of quality.<p> - -<!-- _______________________________________________________________________ --> -<h3><a name="informed">Stay Informed</a></h3> -<div> -<p>Developers should stay informed by reading at least the "dev" mailing list - for the projects you are interested in, such as - <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> for - LLVM, <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev">cfe-dev</a> - for Clang, or <a - href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev">lldb-dev</a> - for LLDB. If you are doing anything more than just casual work on LLVM, it - is suggested that you also subscribe to the "commits" mailing list for the - subproject you're interested in, such as - <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>, - <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits">cfe-commits</a>, - or <a href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits">lldb-commits</a>. - Reading the "commits" list and paying attention to changes being made by - others is a good way to see what other people are interested in and watching - the flow of the project as a whole.</p> - -<p>We recommend that active developers register an email account with - <a href="http://llvm.org/bugs/">LLVM Bugzilla</a> and preferably subscribe to - the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs">llvm-bugs</a> - email list to keep track of bugs and enhancements occurring in LLVM. We - really appreciate people who are proactive at catching incoming bugs in their - components and dealing with them promptly.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="patches">Making a Patch</a></h3> - -<div> -<p>When making a patch for review, the goal is to make it as easy for the - reviewer to read it as possible. As such, we recommend that you:</p> - -<ol> - <li>Make your patch against the Subversion trunk, not a branch, and not an old - version of LLVM. This makes it easy to apply the patch. For information - on how to check out SVN trunk, please see the <a - href="GettingStarted.html#checkout">Getting Started Guide</a>.</li> - - <li>Similarly, patches should be submitted soon after they are generated. Old - patches may not apply correctly if the underlying code changes between the - time the patch was created and the time it is applied.</li> - - <li>Patches should be made with <tt>svn diff</tt>, or similar. If you use - a different tool, make sure it uses the <tt>diff -u</tt> format and - that it doesn't contain clutter which makes it hard to read.</li> - - <li>If you are modifying generated files, such as the top-level - <tt>configure</tt> script, please separate out those changes into - a separate patch from the rest of your changes.</li> -</ol> - -<p>When sending a patch to a mailing list, it is a good idea to send it as an - <em>attachment</em> to the message, not embedded into the text of the - message. This ensures that your mailer will not mangle the patch when it - sends it (e.g. by making whitespace changes or by wrapping lines).</p> - -<p><em>For Thunderbird users:</em> Before submitting a patch, please open - <em>Preferences → Advanced → General → Config Editor</em>, - find the key <tt>mail.content_disposition_type</tt>, and set its value to - <tt>1</tt>. Without this setting, Thunderbird sends your attachment using - <tt>Content-Disposition: inline</tt> rather than <tt>Content-Disposition: - attachment</tt>. Apple Mail gamely displays such a file inline, making it - difficult to work with for reviewers using that program.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="reviews">Code Reviews</a></h3> -<div> -<p>LLVM has a code review policy. Code review is one way to increase the quality - of software. We generally follow these policies:</p> - -<ol> - <li>All developers are required to have significant changes reviewed before - they are committed to the repository.</li> - - <li>Code reviews are conducted by email, usually on the llvm-commits - list.</li> - - <li>Code can be reviewed either before it is committed or after. We expect - major changes to be reviewed before being committed, but smaller changes - (or changes where the developer owns the component) can be reviewed after - commit.</li> - - <li>The developer responsible for a code change is also responsible for making - all necessary review-related changes.</li> - - <li>Code review can be an iterative process, which continues until the patch - is ready to be committed.</li> -</ol> - -<p>Developers should participate in code reviews as both reviewers and - reviewees. If someone is kind enough to review your code, you should return - the favor for someone else. Note that anyone is welcome to review and give - feedback on a patch, but only people with Subversion write access can approve - it.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="owners">Code Owners</a></h3> -<div> - -<p>The LLVM Project relies on two features of its process to maintain rapid - development in addition to the high quality of its source base: the - combination of code review plus post-commit review for trusted maintainers. - Having both is a great way for the project to take advantage of the fact that - most people do the right thing most of the time, and only commit patches - without pre-commit review when they are confident they are right.</p> - -<p>The trick to this is that the project has to guarantee that all patches that - are committed are reviewed after they go in: you don't want everyone to - assume someone else will review it, allowing the patch to go unreviewed. To - solve this problem, we have a notion of an 'owner' for a piece of the code. - The sole responsibility of a code owner is to ensure that a commit to their - area of the code is appropriately reviewed, either by themself or by someone - else. The current code owners are:</p> - -<ol> - <li><b>Evan Cheng</b>: Code generator and all targets.</li> - - <li><b>Greg Clayton</b>: LLDB.</li> - - <li><b>Doug Gregor</b>: Clang Frontend Libraries.</li> - - <li><b>Howard Hinnant</b>: libc++.</li> - - <li><b>Anton Korobeynikov</b>: Exception handling, debug information, and - Windows codegen.</li> - - <li><b>Ted Kremenek</b>: Clang Static Analyzer.</li> - - <li><b>Chris Lattner</b>: Everything not covered by someone else.</li> - - <li><b>John McCall</b>: Clang LLVM IR generation.</li> - - <li><b>Jakob Olesen</b>: Register allocators and TableGen.</li> - - <li><b>Duncan Sands</b>: dragonegg and llvm-gcc 4.2.</li> - - <li><b>Peter Collingbourne</b>: libclc.</li> - - <li><b>Tobias Grosser</b>: polly.</li> -</ol> - -<p>Note that code ownership is completely different than reviewers: anyone can - review a piece of code, and we welcome code review from anyone who is - interested. Code owners are the "last line of defense" to guarantee that all - patches that are committed are actually reviewed.</p> - -<p>Being a code owner is a somewhat unglamorous position, but it is incredibly - important for the ongoing success of the project. Because people get busy, - interests change, and unexpected things happen, code ownership is purely - opt-in, and anyone can choose to resign their "title" at any time. For now, - we do not have an official policy on how one gets elected to be a code - owner.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="testcases">Test Cases</a></h3> -<div> -<p>Developers are required to create test cases for any bugs fixed and any new - features added. Some tips for getting your testcase approved:</p> - -<ol> - <li>All feature and regression test cases are added to the - <tt>llvm/test</tt> directory. The appropriate sub-directory should be - selected (see the <a href="TestingGuide.html">Testing Guide</a> for - details).</li> - - <li>Test cases should be written in <a href="LangRef.html">LLVM assembly - language</a> unless the feature or regression being tested requires - another language (e.g. the bug being fixed or feature being implemented is - in the llvm-gcc C++ front-end, in which case it must be written in - C++).</li> - - <li>Test cases, especially for regressions, should be reduced as much as - possible, by <a href="Bugpoint.html">bugpoint</a> or manually. It is - unacceptable to place an entire failing program into <tt>llvm/test</tt> as - this creates a <i>time-to-test</i> burden on all developers. Please keep - them short.</li> -</ol> - -<p>Note that llvm/test and clang/test are designed for regression and small - feature tests only. More extensive test cases (e.g., entire applications, - benchmarks, etc) - should be added to the <tt>llvm-test</tt> test suite. The llvm-test suite is - for coverage (correctness, performance, etc) testing, not feature or - regression testing.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="quality">Quality</a></h3> -<div> -<p>The minimum quality standards that any change must satisfy before being - committed to the main development branch are:</p> - -<ol> - <li>Code must adhere to the <a href="CodingStandards.html">LLVM Coding - Standards</a>.</li> - - <li>Code must compile cleanly (no errors, no warnings) on at least one - platform.</li> - - <li>Bug fixes and new features should <a href="#testcases">include a - testcase</a> so we know if the fix/feature ever regresses in the - future.</li> - - <li>Code must pass the <tt>llvm/test</tt> test suite.</li> - - <li>The code must not cause regressions on a reasonable subset of llvm-test, - where "reasonable" depends on the contributor's judgement and the scope of - the change (more invasive changes require more testing). A reasonable - subset might be something like - "<tt>llvm-test/MultiSource/Benchmarks</tt>".</li> -</ol> - -<p>Additionally, the committer is responsible for addressing any problems found - in the future that the change is responsible for. For example:</p> - -<ul> - <li>The code should compile cleanly on all supported platforms.</li> - - <li>The changes should not cause any correctness regressions in the - <tt>llvm-test</tt> suite and must not cause any major performance - regressions.</li> - - <li>The change set should not cause performance or correctness regressions for - the LLVM tools.</li> - - <li>The changes should not cause performance or correctness regressions in - code compiled by LLVM on all applicable targets.</li> - - <li>You are expected to address any <a href="http://llvm.org/bugs/">bugzilla - bugs</a> that result from your change.</li> -</ul> - -<p>We prefer for this to be handled before submission but understand that it - isn't possible to test all of this for every submission. Our build bots and - nightly testing infrastructure normally finds these problems. A good rule of - thumb is to check the nightly testers for regressions the day after your - change. Build bots will directly email you if a group of commits that - included yours caused a failure. You are expected to check the build bot - messages to see if they are your fault and, if so, fix the breakage.</p> - -<p>Commits that violate these quality standards (e.g. are very broken) may be - reverted. This is necessary when the change blocks other developers from - making progress. The developer is welcome to re-commit the change after the - problem has been fixed.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="commitaccess">Obtaining Commit Access</a></h3> -<div> - -<p>We grant commit access to contributors with a track record of submitting high - quality patches. If you would like commit access, please send an email to - <a href="mailto:sabre@nondot.org">Chris</a> with the following - information:</p> - -<ol> - <li>The user name you want to commit with, e.g. "hacker".</li> - - <li>The full name and email address you want message to llvm-commits to come - from, e.g. "J. Random Hacker <hacker@yoyodyne.com>".</li> - - <li>A "password hash" of the password you want to use, e.g. "2ACR96qjUqsyM". - Note that you don't ever tell us what your password is, you just give it - to us in an encrypted form. To get this, run "htpasswd" (a utility that - comes with apache) in crypt mode (often enabled with "-d"), or find a web - page that will do it for you.</li> -</ol> - -<p>Once you've been granted commit access, you should be able to check out an - LLVM tree with an SVN URL of "https://username@llvm.org/..." instead of the - normal anonymous URL of "http://llvm.org/...". The first time you commit - you'll have to type in your password. Note that you may get a warning from - SVN about an untrusted key, you can ignore this. To verify that your commit - access works, please do a test commit (e.g. change a comment or add a blank - line). Your first commit to a repository may require the autogenerated email - to be approved by a mailing list. This is normal, and will be done when - the mailing list owner has time.</p> - -<p>If you have recently been granted commit access, these policies apply:</p> - -<ol> - <li>You are granted <i>commit-after-approval</i> to all parts of LLVM. To get - approval, submit a <a href="#patches">patch</a> to - <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>. - When approved you may commit it yourself.</li> - - <li>You are allowed to commit patches without approval which you think are - obvious. This is clearly a subjective decision — we simply expect - you to use good judgement. Examples include: fixing build breakage, - reverting obviously broken patches, documentation/comment changes, any - other minor changes.</li> - - <li>You are allowed to commit patches without approval to those portions of - LLVM that you have contributed or maintain (i.e., have been assigned - responsibility for), with the proviso that such commits must not break the - build. This is a "trust but verify" policy and commits of this nature are - reviewed after they are committed.</li> - - <li>Multiple violations of these policies or a single egregious violation may - cause commit access to be revoked.</li> -</ol> - -<p>In any case, your changes are still subject to <a href="#reviews">code - review</a> (either before or after they are committed, depending on the - nature of the change). You are encouraged to review other peoples' patches - as well, but you aren't required to.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="newwork">Making a Major Change</a></h3> -<div> -<p>When a developer begins a major new project with the aim of contributing it - back to LLVM, s/he should inform the community with an email to - the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> - email list, to the extent possible. The reason for this is to: - -<ol> - <li>keep the community informed about future changes to LLVM, </li> - - <li>avoid duplication of effort by preventing multiple parties working on the - same thing and not knowing about it, and</li> - - <li>ensure that any technical issues around the proposed work are discussed - and resolved before any significant work is done.</li> -</ol> - -<p>The design of LLVM is carefully controlled to ensure that all the pieces fit - together well and are as consistent as possible. If you plan to make a major - change to the way LLVM works or want to add a major new extension, it is a - good idea to get consensus with the development community before you start - working on it.</p> - -<p>Once the design of the new feature is finalized, the work itself should be - done as a series of <a href="#incremental">incremental changes</a>, not as a - long-term development branch.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="incremental">Incremental Development</a></h3> -<div> -<p>In the LLVM project, we do all significant changes as a series of incremental - patches. We have a strong dislike for huge changes or long-term development - branches. Long-term development branches have a number of drawbacks:</p> - -<ol> - <li>Branches must have mainline merged into them periodically. If the branch - development and mainline development occur in the same pieces of code, - resolving merge conflicts can take a lot of time.</li> - - <li>Other people in the community tend to ignore work on branches.</li> - - <li>Huge changes (produced when a branch is merged back onto mainline) are - extremely difficult to <a href="#reviews">code review</a>.</li> - - <li>Branches are not routinely tested by our nightly tester - infrastructure.</li> - - <li>Changes developed as monolithic large changes often don't work until the - entire set of changes is done. Breaking it down into a set of smaller - changes increases the odds that any of the work will be committed to the - main repository.</li> -</ol> - -<p>To address these problems, LLVM uses an incremental development style and we - require contributors to follow this practice when making a large/invasive - change. Some tips:</p> - -<ul> - <li>Large/invasive changes usually have a number of secondary changes that are - required before the big change can be made (e.g. API cleanup, etc). These - sorts of changes can often be done before the major change is done, - independently of that work.</li> - - <li>The remaining inter-related work should be decomposed into unrelated sets - of changes if possible. Once this is done, define the first increment and - get consensus on what the end goal of the change is.</li> - - <li>Each change in the set can be stand alone (e.g. to fix a bug), or part of - a planned series of changes that works towards the development goal.</li> - - <li>Each change should be kept as small as possible. This simplifies your work - (into a logical progression), simplifies code review and reduces the - chance that you will get negative feedback on the change. Small increments - also facilitate the maintenance of a high quality code base.</li> - - <li>Often, an independent precursor to a big change is to add a new API and - slowly migrate clients to use the new API. Each change to use the new API - is often "obvious" and can be committed without review. Once the new API - is in place and used, it is much easier to replace the underlying - implementation of the API. This implementation change is logically - separate from the API change.</li> -</ul> - -<p>If you are interested in making a large change, and this scares you, please - make sure to first <a href="#newwork">discuss the change/gather consensus</a> - then ask about the best way to go about making the change.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="attribution">Attribution of Changes</a></h3> -<div> -<p>We believe in correct attribution of contributions to their contributors. - However, we do not want the source code to be littered with random - attributions "this code written by J. Random Hacker" (this is noisy and - distracting). In practice, the revision control system keeps a perfect - history of who changed what, and the CREDITS.txt file describes higher-level - contributions. If you commit a patch for someone else, please say "patch - contributed by J. Random Hacker!" in the commit message.</p> - -<p>Overall, please do not add contributor names to the source code.</p> -</div> - -</div> - -<!--=========================================================================--> -<h2> - <a name="clp">Copyright, License, and Patents</a> -</h2> -<!--=========================================================================--> - -<div> - -<div class="doc_notes"> -<p style="text-align:center;font-weight:bold">NOTE: This section deals with - legal matters but does not provide legal advice. We are not lawyers — - please seek legal counsel from an attorney.</p> -</div> - -<div> -<p>This section addresses the issues of copyright, license and patents for the - LLVM project. The copyright for the code is held by the individual - contributors of the code and the terms of its license to LLVM users and - developers is the - <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of - Illinois/NCSA Open Source License</a> (with portions dual licensed under the - <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a>, - see below). As contributor to the LLVM project, you agree to allow any - contributions to the project to licensed under these terms.</p> - - -<!-- _______________________________________________________________________ --> -<h3><a name="copyright">Copyright</a></h3> -<div> - -<p>The LLVM project does not require copyright assignments, which means that the - copyright for the code in the project is held by its respective contributors - who have each agreed to release their contributed code under the terms of the - <a href="#license">LLVM License</a>.</p> - -<p>An implication of this is that the LLVM license is unlikely to ever change: - changing it would require tracking down all the contributors to LLVM and - getting them to agree that a license change is acceptable for their - contribution. Since there are no plans to change the license, this is not a - cause for concern.</p> - -<p>As a contributor to the project, this means that you (or your company) retain - ownership of the code you contribute, that it cannot be used in a way that - contradicts the license (which is a liberal BSD-style license), and that the - license for your contributions won't change without your approval in the - future.</p> - -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="license">License</a></h3> -<div> -<p>We intend to keep LLVM perpetually open source and to use a liberal open - source license. <b>As a contributor to the project, you agree that any - contributions be licensed under the terms of the corresponding - subproject.</b> - All of the code in LLVM is available under the - <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of - Illinois/NCSA Open Source License</a>, which boils down to this:</p> - -<ul> - <li>You can freely distribute LLVM.</li> - <li>You must retain the copyright notice if you redistribute LLVM.</li> - <li>Binaries derived from LLVM must reproduce the copyright notice (e.g. in an - included readme file).</li> - <li>You can't use our names to promote your LLVM derived products.</li> - <li>There's no warranty on LLVM at all.</li> -</ul> - -<p>We believe this fosters the widest adoption of LLVM because it <b>allows - commercial products to be derived from LLVM</b> with few restrictions and - without a requirement for making any derived works also open source (i.e. - LLVM's license is not a "copyleft" license like the GPL). We suggest that you - read the <a href="http://www.opensource.org/licenses/UoI-NCSA.php">License</a> - if further clarification is needed.</p> - -<p>In addition to the UIUC license, the runtime library components of LLVM - (<b>compiler_rt, libc++, and libclc</b>) are also licensed under the <a - href="http://www.opensource.org/licenses/mit-license.php">MIT license</a>, - which does not contain the binary redistribution clause. As a user of these - runtime libraries, it means that you can choose to use the code under either - license (and thus don't need the binary redistribution clause), and as a - contributor to the code that you agree that any contributions to these - libraries be licensed under both licenses. We feel that this is important - for runtime libraries, because they are implicitly linked into applications - and therefore should not subject those applications to the binary - redistribution clause. This also means that it is ok to move code from (e.g.) - libc++ to the LLVM core without concern, but that code cannot be moved from - the LLVM core to libc++ without the copyright owner's permission. -</p> - -<p>Note that the LLVM Project does distribute llvm-gcc and dragonegg, <b>which - are GPL.</b> - This means that anything "linked" into llvm-gcc must itself be compatible - with the GPL, and must be releasable under the terms of the GPL. This - implies that <b>any code linked into llvm-gcc and distributed to others may - be subject to the viral aspects of the GPL</b> (for example, a proprietary - code generator linked into llvm-gcc must be made available under the GPL). - This is not a problem for code already distributed under a more liberal - license (like the UIUC license), and GPL-containing subprojects are kept - in separate SVN repositories whose LICENSE.txt files specifically indicate - that they contain GPL code.</p> - -<p>We have no plans to change the license of LLVM. If you have questions or - comments about the license, please contact the - <a href="mailto:llvmdev@cs.uiuc.edu">LLVM Developer's Mailing List</a>.</p> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="patents">Patents</a></h3> -<div> -<p>To the best of our knowledge, LLVM does not infringe on any patents (we have - actually removed code from LLVM in the past that was found to infringe). - Having code in LLVM that infringes on patents would violate an important goal - of the project by making it hard or impossible to reuse the code for - arbitrary purposes (including commercial use).</p> - -<p>When contributing code, we expect contributors to notify us of any potential - for patent-related trouble with their changes (including from third parties). - If you or your employer own - the rights to a patent and would like to contribute code to LLVM that relies - on it, we require that the copyright owner sign an agreement that allows any - other user of LLVM to freely use your patent. Please contact - the <a href="mailto:llvm-oversight@cs.uiuc.edu">oversight group</a> for more - details.</p> -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - Written by the - <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst new file mode 100644 index 0000000000..96e4af3321 --- /dev/null +++ b/docs/DeveloperPolicy.rst @@ -0,0 +1,531 @@ +.. _developer_policy: + +===================== +LLVM Developer Policy +===================== + +.. contents:: + :local: + +Introduction +============ + +This document contains the LLVM Developer Policy which defines the project's +policy towards developers and their contributions. The intent of this policy is +to eliminate miscommunication, rework, and confusion that might arise from the +distributed nature of LLVM's development. By stating the policy in clear terms, +we hope each developer can know ahead of time what to expect when making LLVM +contributions. This policy covers all llvm.org subprojects, including Clang, +LLDB, libc++, etc. + +This policy is also designed to accomplish the following objectives: + +#. Attract both users and developers to the LLVM project. + +#. Make life as simple and easy for contributors as possible. + +#. Keep the top of Subversion trees as stable as possible. + +#. Establish awareness of the project's `copyright, license, and patent + policies`_ with contributors to the project. + +This policy is aimed at frequent contributors to LLVM. People interested in +contributing one-off patches can do so in an informal way by sending them to the +`llvm-commits mailing list +<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ and engaging another +developer to see it through the process. + +Developer Policies +================== + +This section contains policies that pertain to frequent LLVM developers. We +always welcome `one-off patches`_ from people who do not routinely contribute to +LLVM, but we expect more from frequent contributors to keep the system as +efficient as possible for everyone. Frequent LLVM contributors are expected to +meet the following requirements in order for LLVM to maintain a high standard of +quality. + +Stay Informed +------------- + +Developers should stay informed by reading at least the "dev" mailing list for +the projects you are interested in, such as `llvmdev +<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ for LLVM, `cfe-dev +<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_ for Clang, or `lldb-dev +<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev>`_ for LLDB. If you are +doing anything more than just casual work on LLVM, it is suggested that you also +subscribe to the "commits" mailing list for the subproject you're interested in, +such as `llvm-commits +<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_, `cfe-commits +<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits>`_, or `lldb-commits +<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits>`_. Reading the +"commits" list and paying attention to changes being made by others is a good +way to see what other people are interested in and watching the flow of the +project as a whole. + +We recommend that active developers register an email account with `LLVM +Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs +<http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs>`_ email list to keep track +of bugs and enhancements occurring in LLVM. We really appreciate people who are +proactive at catching incoming bugs in their components and dealing with them +promptly. + +.. _patch: +.. _one-off patches: + +Making a Patch +-------------- + +When making a patch for review, the goal is to make it as easy for the reviewer +to read it as possible. As such, we recommend that you: + +#. Make your patch against the Subversion trunk, not a branch, and not an old + version of LLVM. This makes it easy to apply the patch. For information on + how to check out SVN trunk, please see the `Getting Started + Guide <GettingStarted.html#checkout>`_. + +#. Similarly, patches should be submitted soon after they are generated. Old + patches may not apply correctly if the underlying code changes between the + time the patch was created and the time it is applied. + +#. Patches should be made with ``svn diff``, or similar. If you use a + different tool, make sure it uses the ``diff -u`` format and that it + doesn't contain clutter which makes it hard to read. + +#. If you are modifying generated files, such as the top-level ``configure`` + script, please separate out those changes into a separate patch from the rest + of your changes. + +When sending a patch to a mailing list, it is a good idea to send it as an +*attachment* to the message, not embedded into the text of the message. This +ensures that your mailer will not mangle the patch when it sends it (e.g. by +making whitespace changes or by wrapping lines). + +*For Thunderbird users:* Before submitting a patch, please open *Preferences > +Advanced > General > Config Editor*, find the key +``mail.content_disposition_type``, and set its value to ``1``. Without this +setting, Thunderbird sends your attachment using ``Content-Disposition: inline`` +rather than ``Content-Disposition: attachment``. Apple Mail gamely displays such +a file inline, making it difficult to work with for reviewers using that +program. + +.. _code review: + +Code Reviews +------------ + +LLVM has a code review policy. Code review is one way to increase the quality of +software. We generally follow these policies: + +#. All developers are required to have significant changes reviewed before they + are committed to the repository. + +#. Code reviews are conducted by email, usually on the llvm-commits list. + +#. Code can be reviewed either before it is committed or after. We expect major + changes to be reviewed before being committed, but smaller changes (or + changes where the developer owns the component) can be reviewed after commit. + +#. The developer responsible for a code change is also responsible for making + all necessary review-related changes. + +#. Code review can be an iterative process, which continues until the patch is + ready to be committed. + +Developers should participate in code reviews as both reviewers and +reviewees. If someone is kind enough to review your code, you should return the +favor for someone else. Note that anyone is welcome to review and give feedback +on a patch, but only people with Subversion write access can approve it. + +Code Owners +----------- + +The LLVM Project relies on two features of its process to maintain rapid +development in addition to the high quality of its source base: the combination +of code review plus post-commit review for trusted maintainers. Having both is +a great way for the project to take advantage of the fact that most people do +the right thing most of the time, and only commit patches without pre-commit +review when they are confident they are right. + +The trick to this is that the project has to guarantee that all patches that are +committed are reviewed after they go in: you don't want everyone to assume +someone else will review it, allowing the patch to go unreviewed. To solve this +problem, we have a notion of an 'owner' for a piece of the code. The sole +responsibility of a code owner is to ensure that a commit to their area of the +code is appropriately reviewed, either by themself or by someone else. The +current code owners are: + +* **Evan Cheng**: Code generator and all targets + +* **Greg Clayton**: LLDB + +* **Doug Gregor**: Clang Frontend Libraries + +* **Howard Hinnant**: libc++ + +* **Anton Korobeynikov**: Exception handling, debug information, and Windows + codegen + +* **Ted Kremenek**: Clang Static Analyzer + +* **Chris Lattner**: Everything not covered by someone else + +* **John McCall**: Clang LLVM IR generation + +* **Jakob Olesen**: Register allocators and TableGen + +* **Duncan Sands**: dragonegg and llvm-gcc 4.2 + +* **Peter Collingbourne**: libclc + +* **Tobias Grosser**: polly + +Note that code ownership is completely different than reviewers: anyone can +review a piece of code, and we welcome code review from anyone who is +interested. Code owners are the "last line of defense" to guarantee that all +patches that are committed are actually reviewed. + +Being a code owner is a somewhat unglamorous position, but it is incredibly +important for the ongoing success of the project. Because people get busy, +interests change, and unexpected things happen, code ownership is purely opt-in, +and anyone can choose to resign their "title" at any time. For now, we do not +have an official policy on how one gets elected to be a code owner. + +.. _include a testcase: + +Test Cases +---------- + +Developers are required to create test cases for any bugs fixed and any new +features added. Some tips for getting your testcase approved: + +* All feature and regression test cases are added to the ``llvm/test`` + directory. The appropriate sub-directory should be selected (see the `Testing + Guide <TestingGuide.html>`_ for details). + +* Test cases should be written in `LLVM assembly language <LangRef.html>`_ + unless the feature or regression being tested requires another language + (e.g. the bug being fixed or feature being implemented is in the llvm-gcc C++ + front-end, in which case it must be written in C++). + +* Test cases, especially for regressions, should be reduced as much as possible, + by `bugpoint <Bugpoint.html>`_ or manually. It is unacceptable to place an + entire failing program into ``llvm/test`` as this creates a *time-to-test* + burden on all developers. Please keep them short. + +Note that llvm/test and clang/test are designed for regression and small feature +tests only. More extensive test cases (e.g., entire applications, benchmarks, +etc) should be added to the ``llvm-test`` test suite. The llvm-test suite is +for coverage (correctness, performance, etc) testing, not feature or regression +testing. + +Quality +------- + +The minimum quality standards that any change must satisfy before being +committed to the main development branch are: + +#. Code must adhere to the `LLVM Coding Standards <CodingStandards.html>`_. + +#. Code must compile cleanly (no errors, no warnings) on at least one platform. + +#. Bug fixes and new features should `include a testcase`_ so we know if the + fix/feature ever regresses in the future. + +#. Code must pass the ``llvm/test`` test suite. + +#. The code must not cause regressions on a reasonable subset of llvm-test, + where "reasonable" depends on the contributor's judgement and the scope of + the change (more invasive changes require more testing). A reasonable subset + might be something like "``llvm-test/MultiSource/Benchmarks``". + +Additionally, the committer is responsible for addressing any problems found in +the future that the change is responsible for. For example: + +* The code should compile cleanly on all supported platforms. + +* The changes should not cause any correctness regressions in the ``llvm-test`` + suite and must not cause any major performance regressions. + +* The change set should not cause performance or correctness regressions for the + LLVM tools. + +* The changes should not cause performance or correctness regressions in code + compiled by LLVM on all applicable targets. + +* You are expected to address any `Bugzilla bugs <http://llvm.org/bugs/>`_ that + result from your change. + +We prefer for this to be handled before submission but understand that it isn't +possible to test all of this for every submission. Our build bots and nightly +testing infrastructure normally finds these problems. A good rule of thumb is +to check the nightly testers for regressions the day after your change. Build +bots will directly email you if a group of commits that included yours caused a +failure. You are expected to check the build bot messages to see if they are +your fault and, if so, fix the breakage. + +Commits that violate these quality standards (e.g. are very broken) may be +reverted. This is necessary when the change blocks other developers from making +progress. The developer is welcome to re-commit the change after the problem has +been fixed. + +Obtaining Commit Access +----------------------- + +We grant commit access to contributors with a track record of submitting high +quality patches. If you would like commit access, please send an email to +`Chris <mailto:sabre@nondot.org>`_ with the following information: + +#. The user name you want to commit with, e.g. "hacker". + +#. The full name and email address you want message to llvm-commits to come + from, e.g. "J. Random Hacker <hacker@yoyodyne.com>". + +#. A "password hash" of the password you want to use, e.g. "``2ACR96qjUqsyM``". + Note that you don't ever tell us what your password is, you just give it to + us in an encrypted form. To get this, run "``htpasswd``" (a utility that + comes with apache) in crypt mode (often enabled with "``-d``"), or find a web + page that will do it for you. + +Once you've been granted commit access, you should be able to check out an LLVM +tree with an SVN URL of "https://username@llvm.org/..." instead of the normal +anonymous URL of "http://llvm.org/...". The first time you commit you'll have +to type in your password. Note that you may get a warning from SVN about an +untrusted key, you can ignore this. To verify that your commit access works, +please do a test commit (e.g. change a comment or add a blank line). Your first +commit to a repository may require the autogenerated email to be approved by a +mailing list. This is normal, and will be done when the mailing list owner has +time. + +If you have recently been granted commit access, these policies apply: + +#. You are granted *commit-after-approval* to all parts of LLVM. To get + approval, submit a `patch`_ to `llvm-commits + <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved + you may commit it yourself.</li> + +#. You are allowed to commit patches without approval which you think are + obvious. This is clearly a subjective decision --- we simply expect you to + use good judgement. Examples include: fixing build breakage, reverting + obviously broken patches, documentation/comment changes, any other minor + changes. + +#. You are allowed to commit patches without approval to those portions of LLVM + that you have contributed or maintain (i.e., have been assigned + responsibility for), with the proviso that such commits must not break the + build. This is a "trust but verify" policy and commits of this nature are + reviewed after they are committed. + +#. Multiple violations of these policies or a single egregious violation may + cause commit access to be revoked. + +In any case, your changes are still subject to `code review`_ (either before or +after they are committed, depending on the nature of the change). You are +encouraged to review other peoples' patches as well, but you aren't required +to. + +.. _discuss the change/gather consensus: + +Making a Major Change +--------------------- + +When a developer begins a major new project with the aim of contributing it back +to LLVM, s/he should inform the community with an email to the `llvmdev +<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ email list, to the extent +possible. The reason for this is to: + +#. keep the community informed about future changes to LLVM, + +#. avoid duplication of effort by preventing multiple parties working on the + same thing and not knowing about it, and + +#. ensure that any technical issues around the proposed work are discussed and + resolved before any significant work is done. + +The design of LLVM is carefully controlled to ensure that all the pieces fit +together well and are as consistent as possible. If you plan to make a major +change to the way LLVM works or want to add a major new extension, it is a good +idea to get consensus with the development community before you start working on +it. + +Once the design of the new feature is finalized, the work itself should be done +as a series of `incremental changes`_, not as a long-term development branch. + +.. _incremental changes: + +Incremental Development +----------------------- + +In the LLVM project, we do all significant changes as a series of incremental +patches. We have a strong dislike for huge changes or long-term development +branches. Long-term development branches have a number of drawbacks: + +#. Branches must have mainline merged into them periodically. If the branch + development and mainline development occur in the same pieces of code, + resolving merge conflicts can take a lot of time. + +#. Other people in the community tend to ignore work on branches. + +#. Huge changes (produced when a branch is merged back onto mainline) are + extremely difficult to `code review`_. + +#. Branches are not routinely tested by our nightly tester infrastructure. + +#. Changes developed as monolithic large changes often don't work until the + entire set of changes is done. Breaking it down into a set of smaller + changes increases the odds that any of the work will be committed to the main + repository. + +To address these problems, LLVM uses an incremental development style and we +require contributors to follow this practice when making a large/invasive +change. Some tips: + +* Large/invasive changes usually have a number of secondary changes that are + required before the big change can be made (e.g. API cleanup, etc). These + sorts of changes can often be done before the major change is done, + independently of that work. + +* The remaining inter-related work should be decomposed into unrelated sets of + changes if possible. Once this is done, define the first increment and get + consensus on what the end goal of the change is. + +* Each change in the set can be stand alone (e.g. to fix a bug), or part of a + planned series of changes that works towards the development goal. + +* Each change should be kept as small as possible. This simplifies your work + (into a logical progression), simplifies code review and reduces the chance + that you will get negative feedback on the change. Small increments also + facilitate the maintenance of a high quality code base. + +* Often, an independent precursor to a big change is to add a new API and slowly + migrate clients to use the new API. Each change to use the new API is often + "obvious" and can be committed without review. Once the new API is in place + and used, it is much easier to replace the underlying implementation of the + API. This implementation change is logically separate from the API + change. + +If you are interested in making a large change, and this scares you, please make +sure to first `discuss the change/gather consensus`_ then ask about the best way +to go about making the change. + +Attribution of Changes +---------------------- + +We believe in correct attribution of contributions to their contributors. +However, we do not want the source code to be littered with random attributions +"this code written by J. Random Hacker" (this is noisy and distracting). In +practice, the revision control system keeps a perfect history of who changed +what, and the CREDITS.txt file describes higher-level contributions. If you +commit a patch for someone else, please say "patch contributed by J. Random +Hacker!" in the commit message. + +Overall, please do not add contributor names to the source code. + +.. _copyright, license, and patent policies: + +Copyright, License, and Patents +=============================== + +.. note:: + + This section deals with legal matters but does not provide legal advice. We + are not lawyers --- please seek legal counsel from an attorney. + +This section addresses the issues of copyright, license and patents for the LLVM +project. The copyright for the code is held by the individual contributors of +the code and the terms of its license to LLVM users and developers is the +`University of Illinois/NCSA Open Source License +<http://www.opensource.org/licenses/UoI-NCSA.php>`_ (with portions dual licensed +under the `MIT License <http://www.opensource.org/licenses/mit-license.php>`_, +see below). As contributor to the LLVM project, you agree to allow any +contributions to the project to licensed under these terms. + +Copyright +--------- + +The LLVM project does not require copyright assignments, which means that the +copyright for the code in the project is held by its respective contributors who +have each agreed to release their contributed code under the terms of the `LLVM +License`_. + +An implication of this is that the LLVM license is unlikely to ever change: +changing it would require tracking down all the contributors to LLVM and getting +them to agree that a license change is acceptable for their contribution. Since +there are no plans to change the license, this is not a cause for concern. + +As a contributor to the project, this means that you (or your company) retain +ownership of the code you contribute, that it cannot be used in a way that +contradicts the license (which is a liberal BSD-style license), and that the +license for your contributions won't change without your approval in the +future. + +.. _LLVM License: + +License +------- + +We intend to keep LLVM perpetually open source and to use a liberal open source +license. **As a contributor to the project, you agree that any contributions be +licensed under the terms of the corresponding subproject.** All of the code in +LLVM is available under the `University of Illinois/NCSA Open Source License +<http://www.opensource.org/licenses/UoI-NCSA.php>`_, which boils down to +this: + +* You can freely distribute LLVM. +* You must retain the copyright notice if you redistribute LLVM. +* Binaries derived from LLVM must reproduce the copyright notice (e.g. in an + included readme file). +* You can't use our names to promote your LLVM derived products. +* There's no warranty on LLVM at all. + +We believe this fosters the widest adoption of LLVM because it **allows +commercial products to be derived from LLVM** with few restrictions and without +a requirement for making any derived works also open source (i.e. LLVM's +license is not a "copyleft" license like the GPL). We suggest that you read the +`License <http://www.opensource.org/licenses/UoI-NCSA.php>`_ if further +clarification is needed. + +In addition to the UIUC license, the runtime library components of LLVM +(**compiler_rt, libc++, and libclc**) are also licensed under the `MIT License +<http://www.opensource.org/licenses/mit-license.php>`_, which does not contain +the binary redistribution clause. As a user of these runtime libraries, it +means that you can choose to use the code under either license (and thus don't +need the binary redistribution clause), and as a contributor to the code that +you agree that any contributions to these libraries be licensed under both +licenses. We feel that this is important for runtime libraries, because they +are implicitly linked into applications and therefore should not subject those +applications to the binary redistribution clause. This also means that it is ok +to move code from (e.g.) libc++ to the LLVM core without concern, but that code +cannot be moved from the LLVM core to libc++ without the copyright owner's +permission. + +Note that the LLVM Project does distribute llvm-gcc and dragonegg, **which are +GPL.** This means that anything "linked" into llvm-gcc must itself be compatible +with the GPL, and must be releasable under the terms of the GPL. This implies +that **any code linked into llvm-gcc and distributed to others may be subject to +the viral aspects of the GPL** (for example, a proprietary code generator linked +into llvm-gcc must be made available under the GPL). This is not a problem for +code already distributed under a more liberal license (like the UIUC license), +and GPL-containing subprojects are kept in separate SVN repositories whose +LICENSE.txt files specifically indicate that they contain GPL code. + +We have no plans to change the license of LLVM. If you have questions or +comments about the license, please contact the `LLVM Developer's Mailing +List <mailto:llvmdev@cs.uiuc.edu>`_. + +Patents +------- + +To the best of our knowledge, LLVM does not infringe on any patents (we have +actually removed code from LLVM in the past that was found to infringe). Having +code in LLVM that infringes on patents would violate an important goal of the +project by making it hard or impossible to reuse the code for arbitrary purposes +(including commercial use). + +When contributing code, we expect contributors to notify us of any potential for +patent-related trouble with their changes (including from third parties). If +you or your employer own the rights to a patent and would like to contribute +code to LLVM that relies on it, we require that the copyright owner sign an +agreement that allows any other user of LLVM to freely use your patent. Please +contact the `oversight group <mailto:llvm-oversight@cs.uiuc.edu>`_ for more +details. diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html deleted file mode 100644 index ae0fa513cc..0000000000 --- a/docs/ExceptionHandling.html +++ /dev/null @@ -1,563 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <title>Exception Handling in LLVM</title> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <meta name="description" - content="Exception Handling in LLVM."> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> - -<body> - -<h1>Exception Handling in LLVM</h1> - -<table class="layout" style="width:100%"> - <tr class="layout"> - <td class="left"> -<ul> - <li><a href="#introduction">Introduction</a> - <ol> - <li><a href="#itanium">Itanium ABI Zero-cost Exception Handling</a></li> - <li><a href="#sjlj">Setjmp/Longjmp Exception Handling</a></li> - <li><a href="#overview">Overview</a></li> - </ol></li> - <li><a href="#codegen">LLVM Code Generation</a> - <ol> - <li><a href="#throw">Throw</a></li> - <li><a href="#try_catch">Try/Catch</a></li> - <li><a href="#cleanups">Cleanups</a></li> - <li><a href="#throw_filters">Throw Filters</a></li> - <li><a href="#restrictions">Restrictions</a></li> - </ol></li> - <li><a href="#format_common_intrinsics">Exception Handling Intrinsics</a> - <ol> - <li><a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a></li> - <li><a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a></li> - <li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li> - <li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li> - <li><a href="#llvm_eh_sjlj_callsite"><tt>llvm.eh.sjlj.callsite</tt></a></li> - </ol></li> - <li><a href="#asm">Asm Table Formats</a> - <ol> - <li><a href="#unwind_tables">Exception Handling Frame</a></li> - <li><a href="#exception_tables">Exception Tables</a></li> - </ol></li> -</ul> -</td> -</tr></table> - -<div class="doc_author"> - <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p> -</div> - - -<!-- *********************************************************************** --> -<h2><a name="introduction">Introduction</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>This document is the central repository for all information pertaining to - exception handling in LLVM. It describes the format that LLVM exception - handling information takes, which is useful for those interested in creating - front-ends or dealing directly with the information. Further, this document - provides specific examples of what exception handling information is used for - in C and C++.</p> - -<!-- ======================================================================= --> -<h3> - <a name="itanium">Itanium ABI Zero-cost Exception Handling</a> -</h3> - -<div> - -<p>Exception handling for most programming languages is designed to recover from - conditions that rarely occur during general use of an application. To that - end, exception handling should not interfere with the main flow of an - application's algorithm by performing checkpointing tasks, such as saving the - current pc or register state.</p> - -<p>The Itanium ABI Exception Handling Specification defines a methodology for - providing outlying data in the form of exception tables without inlining - speculative exception handling code in the flow of an application's main - algorithm. Thus, the specification is said to add "zero-cost" to the normal - execution of an application.</p> - -<p>A more complete description of the Itanium ABI exception handling runtime - support of can be found at - <a href="http://www.codesourcery.com/cxx-abi/abi-eh.html">Itanium C++ ABI: - Exception Handling</a>. A description of the exception frame format can be - found at - <a href="http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html">Exception - Frames</a>, with details of the DWARF 4 specification at - <a href="http://dwarfstd.org/Dwarf4Std.php">DWARF 4 Standard</a>. - A description for the C++ exception table formats can be found at - <a href="http://www.codesourcery.com/cxx-abi/exceptions.pdf">Exception Handling - Tables</a>.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="sjlj">Setjmp/Longjmp Exception Handling</a> -</h3> - -<div> - -<p>Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics - <a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a> and - <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> to - handle control flow for exception handling.</p> - -<p>For each function which does exception processing — be - it <tt>try</tt>/<tt>catch</tt> blocks or cleanups — that function - registers itself on a global frame list. When exceptions are unwinding, the - runtime uses this list to identify which functions need processing.<p> - -<p>Landing pad selection is encoded in the call site entry of the function - context. The runtime returns to the function via - <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>, where - a switch table transfers control to the appropriate landing pad based on - the index stored in the function context.</p> - -<p>In contrast to DWARF exception handling, which encodes exception regions - and frame information in out-of-line tables, SJLJ exception handling - builds and removes the unwind frame context at runtime. This results in - faster exception handling at the expense of slower execution when no - exceptions are thrown. As exceptions are, by their nature, intended for - uncommon code paths, DWARF exception handling is generally preferred to - SJLJ.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="overview">Overview</a> -</h3> - -<div> - -<p>When an exception is thrown in LLVM code, the runtime does its best to find a - handler suited to processing the circumstance.</p> - -<p>The runtime first attempts to find an <i>exception frame</i> corresponding to - the function where the exception was thrown. If the programming language - supports exception handling (e.g. C++), the exception frame contains a - reference to an exception table describing how to process the exception. If - the language does not support exception handling (e.g. C), or if the - exception needs to be forwarded to a prior activation, the exception frame - contains information about how to unwind the current activation and restore - the state of the prior activation. This process is repeated until the - exception is handled. If the exception is not handled and no activations - remain, then the application is terminated with an appropriate error - message.</p> - -<p>Because different programming languages have different behaviors when - handling exceptions, the exception handling ABI provides a mechanism for - supplying <i>personalities</i>. An exception handling personality is defined - by way of a <i>personality function</i> (e.g. <tt>__gxx_personality_v0</tt> - in C++), which receives the context of the exception, an <i>exception - structure</i> containing the exception object type and value, and a reference - to the exception table for the current function. The personality function - for the current compile unit is specified in a <i>common exception - frame</i>.</p> - -<p>The organization of an exception table is language dependent. For C++, an - exception table is organized as a series of code ranges defining what to do - if an exception occurs in that range. Typically, the information associated - with a range defines which types of exception objects (using C++ <i>type - info</i>) that are handled in that range, and an associated action that - should take place. Actions typically pass control to a <i>landing - pad</i>.</p> - -<p>A landing pad corresponds roughly to the code found in the <tt>catch</tt> - portion of a <tt>try</tt>/<tt>catch</tt> sequence. When execution resumes at - a landing pad, it receives an <i>exception structure</i> and a - <i>selector value</i> corresponding to the <i>type</i> of exception - thrown. The selector is then used to determine which <i>catch</i> should - actually process the exception.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h2> - <a name="codegen">LLVM Code Generation</a> -</h2> - -<div> - -<p>From a C++ developer's perspective, exceptions are defined in terms of the - <tt>throw</tt> and <tt>try</tt>/<tt>catch</tt> statements. In this section - we will describe the implementation of LLVM exception handling in terms of - C++ examples.</p> - -<!-- ======================================================================= --> -<h3> - <a name="throw">Throw</a> -</h3> - -<div> - -<p>Languages that support exception handling typically provide a <tt>throw</tt> - operation to initiate the exception process. Internally, a <tt>throw</tt> - operation breaks down into two steps.</p> - -<ol> - <li>A request is made to allocate exception space for an exception structure. - This structure needs to survive beyond the current activation. This - structure will contain the type and value of the object being thrown.</li> - - <li>A call is made to the runtime to raise the exception, passing the - exception structure as an argument.</li> -</ol> - -<p>In C++, the allocation of the exception structure is done by the - <tt>__cxa_allocate_exception</tt> runtime function. The exception raising is - handled by <tt>__cxa_throw</tt>. The type of the exception is represented - using a C++ RTTI structure.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="try_catch">Try/Catch</a> -</h3> - -<div> - -<p>A call within the scope of a <i>try</i> statement can potentially raise an - exception. In those circumstances, the LLVM C++ front-end replaces the call - with an <tt>invoke</tt> instruction. Unlike a call, the <tt>invoke</tt> has - two potential continuation points:</p> - -<ol> - <li>where to continue when the call succeeds as per normal, and</li> - - <li>where to continue if the call raises an exception, either by a throw or - the unwinding of a throw</li> -</ol> - -<p>The term used to define a the place where an <tt>invoke</tt> continues after - an exception is called a <i>landing pad</i>. LLVM landing pads are - conceptually alternative function entry points where an exception structure - reference and a type info index are passed in as arguments. The landing pad - saves the exception structure reference and then proceeds to select the catch - block that corresponds to the type info of the exception object.</p> - -<p>The LLVM <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> - instruction</a> is used to convey information about the landing pad to the - back end. For C++, the <tt>landingpad</tt> instruction returns a pointer and - integer pair corresponding to the pointer to the <i>exception structure</i> - and the <i>selector value</i> respectively.</p> - -<p>The <tt>landingpad</tt> instruction takes a reference to the personality - function to be used for this <tt>try</tt>/<tt>catch</tt> sequence. The - remainder of the instruction is a list of <i>cleanup</i>, <i>catch</i>, - and <i>filter</i> clauses. The exception is tested against the clauses - sequentially from first to last. The selector value is a positive number if - the exception matched a type info, a negative number if it matched a filter, - and zero if it matched a cleanup. If nothing is matched, the behavior of - the program is <a href="#restrictions">undefined</a>. If a type info matched, - then the selector value is the index of the type info in the exception table, - which can be obtained using the - <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic.</p> - -<p>Once the landing pad has the type info selector, the code branches to the - code for the first catch. The catch then checks the value of the type info - selector against the index of type info for that catch. Since the type info - index is not known until all the type infos have been gathered in the - backend, the catch code must call the - <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic to - determine the index for a given type info. If the catch fails to match the - selector then control is passed on to the next catch.</p> - -<p>Finally, the entry and exit of catch code is bracketed with calls to - <tt>__cxa_begin_catch</tt> and <tt>__cxa_end_catch</tt>.</p> - -<ul> - <li><tt>__cxa_begin_catch</tt> takes an exception structure reference as an - argument and returns the value of the exception object.</li> - - <li><tt>__cxa_end_catch</tt> takes no arguments. This function:<br><br> - <ol> - <li>Locates the most recently caught exception and decrements its handler - count,</li> - <li>Removes the exception from the <i>caught</i> stack if the handler - count goes to zero, and</li> - <li>Destroys the exception if the handler count goes to zero and the - exception was not re-thrown by throw.</li> - </ol> - <p><b>Note:</b> a rethrow from within the catch may replace this call with - a <tt>__cxa_rethrow</tt>.</p></li> -</ul> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="cleanups">Cleanups</a> -</h3> - -<div> - -<p>A cleanup is extra code which needs to be run as part of unwinding a scope. - C++ destructors are a typical example, but other languages and language - extensions provide a variety of different kinds of cleanups. In general, a - landing pad may need to run arbitrary amounts of cleanup code before actually - entering a catch block. To indicate the presence of cleanups, a - <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> instruction</a> - should have a <i>cleanup</i> clause. Otherwise, the unwinder will not stop at - the landing pad if there are no catches or filters that require it to.</p> - -<p><b>Note:</b> Do not allow a new exception to propagate out of the execution - of a cleanup. This can corrupt the internal state of the unwinder. - Different languages describe different high-level semantics for these - situations: for example, C++ requires that the process be terminated, whereas - Ada cancels both exceptions and throws a third.</p> - -<p>When all cleanups are finished, if the exception is not handled by the - current function, resume unwinding by calling the - <a href="LangRef.html#i_resume"><tt>resume</tt> instruction</a>, passing in - the result of the <tt>landingpad</tt> instruction for the original landing - pad.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="throw_filters">Throw Filters</a> -</h3> - -<div> - -<p>C++ allows the specification of which exception types may be thrown from a - function. To represent this, a top level landing pad may exist to filter out - invalid types. To express this in LLVM code the - <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> instruction</a> will - have a filter clause. The clause consists of an array of type infos. - <tt>landingpad</tt> will return a negative value if the exception does not - match any of the type infos. If no match is found then a call - to <tt>__cxa_call_unexpected</tt> should be made, otherwise - <tt>_Unwind_Resume</tt>. Each of these functions requires a reference to the - exception structure. Note that the most general form of a - <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> instruction</a> can - have any number of catch, cleanup, and filter clauses (though having more - than one cleanup is pointless). The LLVM C++ front-end can generate such - <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> instructions</a> due - to inlining creating nested exception handling scopes.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="restrictions">Restrictions</a> -</h3> - -<div> - -<p>The unwinder delegates the decision of whether to stop in a call frame to - that call frame's language-specific personality function. Not all unwinders - guarantee that they will stop to perform cleanups. For example, the GNU C++ - unwinder doesn't do so unless the exception is actually caught somewhere - further up the stack.</p> - -<p>In order for inlining to behave correctly, landing pads must be prepared to - handle selector results that they did not originally advertise. Suppose that - a function catches exceptions of type <tt>A</tt>, and it's inlined into a - function that catches exceptions of type <tt>B</tt>. The inliner will update - the <tt>landingpad</tt> instruction for the inlined landing pad to include - the fact that <tt>B</tt> is also caught. If that landing pad assumes that it - will only be entered to catch an <tt>A</tt>, it's in for a rude awakening. - Consequently, landing pads must test for the selector results they understand - and then resume exception propagation with the - <a href="LangRef.html#i_resume"><tt>resume</tt> instruction</a> if none of - the conditions match.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h2> - <a name="format_common_intrinsics">Exception Handling Intrinsics</a> -</h2> - -<div> - -<p>In addition to the - <a href="LangRef.html#i_landingpad"><tt>landingpad</tt></a> and - <a href="LangRef.html#i_resume"><tt>resume</tt></a> instructions, LLVM uses - several intrinsic functions (name prefixed with <i><tt>llvm.eh</tt></i>) to - provide exception handling information at various points in generated - code.</p> - -<!-- ======================================================================= --> -<h4> - <a name="llvm_eh_typeid_for">llvm.eh.typeid.for</a> -</h4> - -<div> - -<pre> - i32 @llvm.eh.typeid.for(i8* %type_info) -</pre> - -<p>This intrinsic returns the type info index in the exception table of the - current function. This value can be used to compare against the result - of <a href="LangRef.html#i_landingpad"><tt>landingpad</tt> instruction</a>. - The single argument is a reference to a type info.</p> - -</div> - -<!-- ======================================================================= --> -<h4> - <a name="llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a> -</h4> - -<div> - -<pre> - i32 @llvm.eh.sjlj.setjmp(i8* %setjmp_buf) -</pre> - -<p>For SJLJ based exception handling, this intrinsic forces register saving for - the current function and stores the address of the following instruction for - use as a destination address - by <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>. The - buffer format and the overall functioning of this intrinsic is compatible - with the GCC <tt>__builtin_setjmp</tt> implementation allowing code built - with the clang and GCC to interoperate.</p> - -<p>The single parameter is a pointer to a five word buffer in which the calling - context is saved. The front end places the frame pointer in the first word, - and the target implementation of this intrinsic should place the destination - address for a - <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> in the - second word. The following three words are available for use in a - target-specific manner.</p> - -</div> - -<!-- ======================================================================= --> -<h4> - <a name="llvm_eh_sjlj_longjmp">llvm.eh.sjlj.longjmp</a> -</h4> - -<div> - -<pre> - void @llvm.eh.sjlj.longjmp(i8* %setjmp_buf) -</pre> - -<p>For SJLJ based exception handling, the <tt>llvm.eh.sjlj.longjmp</tt> - intrinsic is used to implement <tt>__builtin_longjmp()</tt>. The single - parameter is a pointer to a buffer populated - by <a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a>. The frame - pointer and stack pointer are restored from the buffer, then control is - transferred to the destination address.</p> - -</div> -<!-- ======================================================================= --> -<h4> - <a name="llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a> -</h4> - -<div> - -<pre> - i8* @llvm.eh.sjlj.lsda() -</pre> - -<p>For SJLJ based exception handling, the <tt>llvm.eh.sjlj.lsda</tt> intrinsic - returns the address of the Language Specific Data Area (LSDA) for the current - function. The SJLJ front-end code stores this address in the exception - handling function context for use by the runtime.</p> - -</div> - -<!-- ======================================================================= --> -<h4> - <a name="llvm_eh_sjlj_callsite">llvm.eh.sjlj.callsite</a> -</h4> - -<div> - -<pre> - void @llvm.eh.sjlj.callsite(i32 %call_site_num) -</pre> - -<p>For SJLJ based exception handling, the <tt>llvm.eh.sjlj.callsite</tt> - intrinsic identifies the callsite value associated with the - following <tt>invoke</tt> instruction. This is used to ensure that landing - pad entries in the LSDA are generated in matching order.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h2> - <a name="asm">Asm Table Formats</a> -</h2> - -<div> - -<p>There are two tables that are used by the exception handling runtime to - determine which actions should be taken when an exception is thrown.</p> - -<!-- ======================================================================= --> -<h3> - <a name="unwind_tables">Exception Handling Frame</a> -</h3> - -<div> - -<p>An exception handling frame <tt>eh_frame</tt> is very similar to the unwind - frame used by DWARF debug info. The frame contains all the information - necessary to tear down the current frame and restore the state of the prior - frame. There is an exception handling frame for each function in a compile - unit, plus a common exception handling frame that defines information common - to all functions in the unit.</p> - -<!-- Todo - Table details here. --> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="exception_tables">Exception Tables</a> -</h3> - -<div> - -<p>An exception table contains information about what actions to take when an - exception is thrown in a particular part of a function's code. There is one - exception table per function, except leaf functions and functions that have - calls only to non-throwing functions. They do not need an exception - table.</p> - -<!-- Todo - Table details here. --> - -</div> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst new file mode 100644 index 0000000000..190f18261d --- /dev/null +++ b/docs/ExceptionHandling.rst @@ -0,0 +1,367 @@ +.. _exception_handling: + +========================== +Exception Handling in LLVM +========================== + +.. contents:: + :local: + +Introduction +============ + +This document is the central repository for all information pertaining to +exception handling in LLVM. It describes the format that LLVM exception +handling information takes, which is useful for those interested in creating +front-ends or dealing directly with the information. Further, this document +provides specific examples of what exception handling information is used for in +C and C++. + +Itanium ABI Zero-cost Exception Handling +---------------------------------------- + +Exception handling for most programming languages is designed to recover from +conditions that rarely occur during general use of an application. To that end, +exception handling should not interfere with the main flow of an application's +algorithm by performing checkpointing tasks, such as saving the current pc or +register state. + +The Itanium ABI Exception Handling Specification defines a methodology for +providing outlying data in the form of exception tables without inlining +speculative exception handling code in the flow of an application's main +algorithm. Thus, the specification is said to add "zero-cost" to the normal +execution of an application. + +A more complete description of the Itanium ABI exception handling runtime +support of can be found at `Itanium C++ ABI: Exception Handling +<http://www.codesourcery.com/cxx-abi/abi-eh.html>`_. A description of the +exception frame format can be found at `Exception Frames +<http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html>`_, +with details of the DWARF 4 specification at `DWARF 4 Standard +<http://dwarfstd.org/Dwarf4Std.php>`_. A description for the C++ exception +table formats can be found at `Exception Handling Tables +<http://www.codesourcery.com/cxx-abi/exceptions.pdf>`_. + +Setjmp/Longjmp Exception Handling +--------------------------------- + +Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics +`llvm.eh.sjlj.setjmp`_ and `llvm.eh.sjlj.longjmp`_ to handle control flow for +exception handling. + +For each function which does exception processing --- be it ``try``/``catch`` +blocks or cleanups --- that function registers itself on a global frame +list. When exceptions are unwinding, the runtime uses this list to identify +which functions need processing. + +Landing pad selection is encoded in the call site entry of the function +context. The runtime returns to the function via `llvm.eh.sjlj.longjmp`_, where +a switch table transfers control to the appropriate landing pad based on the +index stored in the function context. + +In contrast to DWARF exception handling, which encodes exception regions and +frame information in out-of-line tables, SJLJ exception handling builds and +removes the unwind frame context at runtime. This results in faster exception +handling at the expense of slower execution when no exceptions are thrown. As +exceptions are, by their nature, intended for uncommon code paths, DWARF +exception handling is generally preferred to SJLJ. + +Overview +-------- + +When an exception is thrown in LLVM code, the runtime does its best to find a +handler suited to processing the circumstance. + +The runtime first attempts to find an *exception frame* corresponding to the +function where the exception was thrown. If the programming language supports +exception handling (e.g. C++), the exception frame contains a reference to an +exception table describing how to process the exception. If the language does +not support exception handling (e.g. C), or if the exception needs to be +forwarded to a prior activation, the exception frame contains information about +how to unwind the current activation and restore the state of the prior +activation. This process is repeated until the exception is handled. If the +exception is not handled and no activations remain, then the application is +terminated with an appropriate error message. + +Because different programming languages have different behaviors when handling +exceptions, the exception handling ABI provides a mechanism for +supplying *personalities*. An exception handling personality is defined by +way of a *personality function* (e.g. ``__gxx_personality_v0`` in C++), +which receives the context of the exception, an *exception structure* +containing the exception object type and value, and a reference to the exception +table for the current function. The personality function for the current +compile unit is specified in a *common exception frame*. + +The organization of an exception table is language dependent. For C++, an +exception table is organized as a series of code ranges defining what to do if +an exception occurs in that range. Typically, the information associated with a +range defines which types of exception objects (using C++ *type info*) that are +handled in that range, and an associated action that should take place. Actions +typically pass control to a *landing pad*. + +A landing pad corresponds roughly to the code found in the ``catch`` portion of +a ``try``/``catch`` sequence. When execution resumes at a landing pad, it +receives an *exception structure* and a *selector value* corresponding to the +*type* of exception thrown. The selector is then used to determine which *catch* +should actually process the exception. + +LLVM Code Generation +==================== + +From a C++ developer's perspective, exceptions are defined in terms of the +``throw`` and ``try``/``catch`` statements. In this section we will describe the +implementation of LLVM exception handling in terms of C++ examples. + +Throw +----- + +Languages that support exception handling typically provide a ``throw`` +operation to initiate the exception process. Internally, a ``throw`` operation +breaks down into two steps. + +#. A request is made to allocate exception space for an exception structure. + This structure needs to survive beyond the current activation. This structure + will contain the type and value of the object being thrown. + +#. A call is made to the runtime to raise the exception, passing the exception + structure as an argument. + +In C++, the allocation of the exception structure is done by the +``__cxa_allocate_exception`` runtime function. The exception raising is handled +by ``__cxa_throw``. The type of the exception is represented using a C++ RTTI +structure. + +Try/Catch +--------- + +A call within the scope of a *try* statement can potentially raise an +exception. In those circumstances, the LLVM C++ front-end replaces the call with +an ``invoke`` instruction. Unlike a call, the ``invoke`` has two potential +continuation points: + +#. where to continue when the call succeeds as per normal, and + +#. where to continue if the call raises an exception, either by a throw or the + unwinding of a throw + +The term used to define a the place where an ``invoke`` continues after an +exception is called a *landing pad*. LLVM landing pads are conceptually +alternative function entry points where an exception structure reference and a +type info index are passed in as arguments. The landing pad saves the exception +structure reference and then proceeds to select the catch block that corresponds +to the type info of the exception object. + +The LLVM `landingpad instruction <LangRef.html#i_landingpad>`_ is used to convey +information about the landing pad to the back end. For C++, the ``landingpad`` +instruction returns a pointer and integer pair corresponding to the pointer to +the *exception structure* and the *selector value* respectively. + +The ``landingpad`` instruction takes a reference to the personality function to +be used for this ``try``/``catch`` sequence. The remainder of the instruction is +a list of *cleanup*, *catch*, and *filter* clauses. The exception is tested +against the clauses sequentially from first to last. The selector value is a +positive number if the exception matched a type info, a negative number if it +matched a filter, and zero if it matched a cleanup. If nothing is matched, the +behavior of the program is `undefined`_. If a type info matched, then the +selector value is the index of the type info in the exception table, which can +be obtained using the `llvm.eh.typeid.for`_ intrinsic. + +Once the landing pad has the type info selector, the code branches to the code +for the first catch. The catch then checks the value of the type info selector +against the index of type info for that catch. Since the type info index is not +known until all the type infos have been gathered in the backend, the catch code +must call the `llvm.eh.typeid.for`_ intrinsic to determine the index for a given +type info. If the catch fails to match the selector then control is passed on to +the next catch. + +Finally, the entry and exit of catch code is bracketed with calls to +``__cxa_begin_catch`` and ``__cxa_end_catch``. + +* ``__cxa_begin_catch`` takes an exception structure reference as an argument + and returns the value of the exception object. + +* ``__cxa_end_catch`` takes no arguments. This function: + + #. Locates the most recently caught exception and decrements its handler + count, + + #. Removes the exception from the *caught* stack if the handler count goes to + zero, and + + #. Destroys the exception if the handler count goes to zero and the exception + was not re-thrown by throw. + + .. note:: + + a rethrow from within the catch may replace this call with a + ``__cxa_rethrow``. + +Cleanups +-------- + +A cleanup is extra code which needs to be run as part of unwinding a scope. C++ +destructors are a typical example, but other languages and language extensions +provide a variety of different kinds of cleanups. In general, a landing pad may +need to run arbitrary amounts of cleanup code before actually entering a catch +block. To indicate the presence of cleanups, a `landingpad +instruction <LangRef.html#i_landingpad>`_ should have a *cleanup* +clause. Otherwise, the unwinder will not stop at the landing pad if there are no +catches or filters that require it to. + +.. note:: + + Do not allow a new exception to propagate out of the execution of a + cleanup. This can corrupt the internal state of the unwinder. Different + languages describe different high-level semantics for these situations: for + example, C++ requires that the process be terminated, whereas Ada cancels both + exceptions and throws a third. + +When all cleanups are finished, if the exception is not handled by the current +function, resume unwinding by calling the `resume +instruction <LangRef.html#i_resume>`_, passing in the result of the +``landingpad`` instruction for the original landing pad. + +Throw Filters +------------- + +C++ allows the specification of which exception types may be thrown from a +function. To represent this, a top level landing pad may exist to filter out +invalid types. To express this in LLVM code the `landingpad +instruction <LangRef.html#i_landingpad>`_ will have a filter clause. The clause +consists of an array of type infos. ``landingpad`` will return a negative value +if the exception does not match any of the type infos. If no match is found then +a call to ``__cxa_call_unexpected`` should be made, otherwise +``_Unwind_Resume``. Each of these functions requires a reference to the +exception structure. Note that the most general form of a ``landingpad`` +instruction can have any number of catch, cleanup, and filter clauses (though +having more than one cleanup is pointless). The LLVM C++ front-end can generate +such ``landingpad`` instructions due to inlining creating nested exception +handling scopes. + +.. _undefined: + +Restrictions +------------ + +The unwinder delegates the decision of whether to stop in a call frame to that +call frame's language-specific personality function. Not all unwinders guarantee +that they will stop to perform cleanups. For example, the GNU C++ unwinder +doesn't do so unless the exception is actually caught somewhere further up the +stack. + +In order for inlining to behave correctly, landing pads must be prepared to +handle selector results that they did not originally advertise. Suppose that a +function catches exceptions of type ``A``, and it's inlined into a function that +catches exceptions of type ``B``. The inliner will update the ``landingpad`` +instruction for the inlined landing pad to include the fact that ``B`` is also +caught. If that landing pad assumes that it will only be entered to catch an +``A``, it's in for a rude awakening. Consequently, landing pads must test for +the selector results they understand and then resume exception propagation with +the `resume instruction <LangRef.html#i_resume>`_ if none of the conditions +match. + +Exception Handling Intrinsics +============================= + +In addition to the ``landingpad`` and ``resume`` instructions, LLVM uses several +intrinsic functions (name prefixed with ``llvm.eh``) to provide exception +handling information at various points in generated code. + +.. _llvm.eh.typeid.for: + +llvm.eh.typeid.for +------------------ + +.. code-block:: llvm + + i32 @llvm.eh.typeid.for(i8* %type_info) + + +This intrinsic returns the type info index in the exception table of the current +function. This value can be used to compare against the result of +``landingpad`` instruction. The single argument is a reference to a type info. + +.. _llvm.eh.sjlj.setjmp: + +llvm.eh.sjlj.setjmp +------------------- + +.. code-block:: llvm + + i32 @llvm.eh.sjlj.setjmp(i8* %setjmp_buf) + +For SJLJ based exception handling, this intrinsic forces register saving for the +current function and stores the address of the following instruction for use as +a destination address by `llvm.eh.sjlj.longjmp`_. The buffer format and the +overall functioning of this intrinsic is compatible with the GCC +``__builtin_setjmp`` implementation allowing code built with the clang and GCC +to interoperate. + +The single parameter is a pointer to a five word buffer in which the calling +context is saved. The front end places the frame pointer in the first word, and +the target implementation of this intrinsic should place the destination address +for a `llvm.eh.sjlj.longjmp`_ in the second word. The following three words are +available for use in a target-specific manner. + +.. _llvm.eh.sjlj.longjmp: + +llvm.eh.sjlj.longjmp +-------------------- + +.. code-block:: llvm + + void @llvm.eh.sjlj.longjmp(i8* %setjmp_buf) + +For SJLJ based exception handling, the ``llvm.eh.sjlj.longjmp`` intrinsic is +used to implement ``__builtin_longjmp()``. The single parameter is a pointer to +a buffer populated by `llvm.eh.sjlj.setjmp`_. The frame pointer and stack +pointer are restored from the buffer, then control is transferred to the +destination address. + +llvm.eh.sjlj.lsda +----------------- + +.. code-block:: llvm + + i8* @llvm.eh.sjlj.lsda() + +For SJLJ based exception handling, the ``llvm.eh.sjlj.lsda`` intrinsic returns +the address of the Language Specific Data Area (LSDA) for the current +function. The SJLJ front-end code stores this address in the exception handling +function context for use by the runtime. + +llvm.eh.sjlj.callsite +--------------------- + +.. code-block:: llvm + + void @llvm.eh.sjlj.callsite(i32 %call_site_num) + +For SJLJ based exception handling, the ``llvm.eh.sjlj.callsite`` intrinsic +identifies the callsite value associated with the following ``invoke`` +instruction. This is used to ensure that landing pad entries in the LSDA are +generated in matching order. + +Asm Table Formats +================= + +There are two tables that are used by the exception handling runtime to +determine which actions should be taken when an exception is thrown. + +Exception Handling Frame +------------------------ + +An exception handling frame ``eh_frame`` is very similar to the unwind frame +used by DWARF debug info. The frame contains all the information necessary to +tear down the current frame and restore the state of the prior frame. There is +an exception handling frame for each function in a compile unit, plus a common +exception handling frame that defines information common to all functions in the +unit. + +Exception Tables +---------------- + +An exception table contains information about what actions to take when an +exception is thrown in a particular part of a function's code. There is one +exception table per function, except leaf functions and functions that have +calls only to non-throwing functions. They do not need an exception table. diff --git a/docs/FAQ.html b/docs/FAQ.html deleted file mode 100644 index dfc72704e6..0000000000 --- a/docs/FAQ.html +++ /dev/null @@ -1,761 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM: Frequently Asked Questions</title> - <style type="text/css"> - @import url("_static/llvm.css"); - .question { font-weight: bold } - .answer { margin-left: 2em } - </style> -</head> -<body> - -<h1> - LLVM: Frequently Asked Questions -</h1> - -<ol> - <li><a href="#license">License</a> - <ol> - <li>Does the University of Illinois Open Source License really qualify as an - "open source" license?</li> - - <li>Can I modify LLVM source code and redistribute the modified source?</li> - - <li>Can I modify LLVM source code and redistribute binaries or other tools - based on it, without redistributing the source?</li> - </ol></li> - - <li><a href="#source">Source code</a> - <ol> - <li>In what language is LLVM written?</li> - - <li>How portable is the LLVM source code?</li> - </ol></li> - - <li><a href="#build">Build Problems</a> - <ol> - <li>When I run configure, it finds the wrong C compiler.</li> - - <li>The <tt>configure</tt> script finds the right C compiler, but it uses - the LLVM linker from a previous build. What do I do?</li> - - <li>When creating a dynamic library, I get a strange GLIBC error.</li> - - <li>I've updated my source tree from Subversion, and now my build is trying - to use a file/directory that doesn't exist.</li> - - <li>I've modified a Makefile in my source tree, but my build tree keeps - using the old version. What do I do?</li> - - <li>I've upgraded to a new version of LLVM, and I get strange build - errors.</li> - - <li>I've built LLVM and am testing it, but the tests freeze.</li> - - <li>Why do test results differ when I perform different types of - builds?</li> - - <li>Compiling LLVM with GCC 3.3.2 fails, what should I do?</li> - - <li>Compiling LLVM with GCC succeeds, but the resulting tools do not work, - what can be wrong?</li> - - <li>When I use the test suite, all of the C Backend tests fail. What is - wrong?</li> - - <li>After Subversion update, rebuilding gives the error "No rule to make - target".</li> - - </ol></li> - - <li><a href="#felangs">Source Languages</a> - <ol> - <li><a href="#langs">What source languages are supported?</a></li> - - <li><a href="#langirgen">I'd like to write a self-hosting LLVM compiler. How - should I interface with the LLVM middle-end optimizers and back-end code - generators?</a></li> - - <li><a href="#langhlsupp">What support is there for higher level source - language constructs for building a compiler?</a></li> - - <li><a href="GetElementPtr.html">I don't understand the GetElementPtr - instruction. Help!</a></li> - </ol> - - <li><a href="#cfe">Using the C and C++ Front Ends</a> - <ol> - <li><a href="#platformindependent">Can I compile C or C++ code to - platform-independent LLVM bitcode?</a></li> - </ol> - </li> - - <li><a href="#cfe_code">Questions about code generated by the demo page</a> - <ol> - <li><a href="#iosinit">What is this <tt>llvm.global_ctors</tt> and - <tt>_GLOBAL__I_a...</tt> stuff that happens when I - #include <iostream>?</a></li> - - <li><a href="#codedce">Where did all of my code go??</a></li> - - <li><a href="#undef">What is this "<tt>undef</tt>" thing that shows up in - my code?</a></li> - - <li><a href="#callconvwrong">Why does instcombine + simplifycfg turn - a call to a function with a mismatched calling convention into "unreachable"? - Why not make the verifier reject it?</a></li> - </ol> - </li> -</ol> - -<div class="doc_author"> - <p>Written by <a href="http://llvm.org/">The LLVM Team</a></p> -</div> - - -<!-- *********************************************************************** --> -<h2> - <a name="license">License</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<div class="question"> -<p>Does the University of Illinois Open Source License really qualify as an - "open source" license?</p> -</div> - -<div class="answer"> -<p>Yes, the license - is <a href="http://www.opensource.org/licenses/UoI-NCSA.php">certified</a> by - the Open Source Initiative (OSI).</p> -</div> - -<div class="question"> -<p>Can I modify LLVM source code and redistribute the modified source?</p> -</div> - -<div class="answer"> -<p>Yes. The modified source distribution must retain the copyright notice and - follow the three bulletted conditions listed in - the <a href="http://llvm.org/svn/llvm-project/llvm/trunk/LICENSE.TXT">LLVM - license</a>.</p> -</div> - -<div class="question"> -<p>Can I modify LLVM source code and redistribute binaries or other tools based - on it, without redistributing the source?</p> -</div> - -<div class="answer"> -<p>Yes. This is why we distribute LLVM under a less restrictive license than - GPL, as explained in the first question above.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="source">Source Code</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<div class="question"> -<p>In what language is LLVM written?</p> -</div> - -<div class="answer"> -<p>All of the LLVM tools and libraries are written in C++ with extensive use of - the STL.</p> -</div> - -<div class="question"> -<p>How portable is the LLVM source code?</p> -</div> - -<div class="answer"> -<p>The LLVM source code should be portable to most modern UNIX-like operating -systems. Most of the code is written in standard C++ with operating system -services abstracted to a support library. The tools required to build and test -LLVM have been ported to a plethora of platforms.</p> - -<p>Some porting problems may exist in the following areas:</p> - -<ul> - <li>The autoconf/makefile build system relies heavily on UNIX shell tools, - like the Bourne Shell and sed. Porting to systems without these tools - (MacOS 9, Plan 9) Will require more effort.</li> -</ul> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="build">Build Problems</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<div class="question"> -<p>When I run configure, it finds the wrong C compiler.</p> -</div> - -<div class="answer"> -<p>The <tt>configure</tt> script attempts to locate first <tt>gcc</tt> and then - <tt>cc</tt>, unless it finds compiler paths set in <tt>CC</tt> - and <tt>CXX</tt> for the C and C++ compiler, respectively.</p> - -<p>If <tt>configure</tt> finds the wrong compiler, either adjust your - <tt>PATH</tt> environment variable or set <tt>CC</tt> and <tt>CXX</tt> - explicitly.</p> - -</div> - -<div class="question"> -<p>The <tt>configure</tt> script finds the right C compiler, but it uses the - LLVM tools from a previous build. What do I do?</p> -</div> - -<div class="answer"> -<p>The <tt>configure</tt> script uses the <tt>PATH</tt> to find executables, so - if it's grabbing the wrong linker/assembler/etc, there are two ways to fix - it:</p> - -<ol> - <li><p>Adjust your <tt>PATH</tt> environment variable so that the correct - program appears first in the <tt>PATH</tt>. This may work, but may not be - convenient when you want them <i>first</i> in your path for other - work.</p></li> - - <li><p>Run <tt>configure</tt> with an alternative <tt>PATH</tt> that is - correct. In a Bourne compatible shell, the syntax would be:</p> - -<pre class="doc_code"> -% PATH=[the path without the bad program] ./configure ... -</pre> - - <p>This is still somewhat inconvenient, but it allows <tt>configure</tt> - to do its work without having to adjust your <tt>PATH</tt> - permanently.</p></li> -</ol> -</div> - -<div class="question"> -<p>When creating a dynamic library, I get a strange GLIBC error.</p> -</div> - -<div class="answer"> -<p>Under some operating systems (i.e. Linux), libtool does not work correctly if - GCC was compiled with the --disable-shared option. To work around this, - install your own version of GCC that has shared libraries enabled by - default.</p> -</div> - -<div class="question"> -<p>I've updated my source tree from Subversion, and now my build is trying to - use a file/directory that doesn't exist.</p> -</div> - -<div class="answer"> -<p>You need to re-run configure in your object directory. When new Makefiles - are added to the source tree, they have to be copied over to the object tree - in order to be used by the build.</p> -</div> - -<div class="question"> -<p>I've modified a Makefile in my source tree, but my build tree keeps using the - old version. What do I do?</p> -</div> - -<div class="answer"> -<p>If the Makefile already exists in your object tree, you can just run the - following command in the top level directory of your object tree:</p> - -<pre class="doc_code"> -% ./config.status <relative path to Makefile> -</pre> - -<p>If the Makefile is new, you will have to modify the configure script to copy - it over.</p> -</div> - -<div class="question"> -<p>I've upgraded to a new version of LLVM, and I get strange build errors.</p> -</div> - -<div class="answer"> - -<p>Sometimes, changes to the LLVM source code alters how the build system works. - Changes in libtool, autoconf, or header file dependencies are especially - prone to this sort of problem.</p> - -<p>The best thing to try is to remove the old files and re-build. In most - cases, this takes care of the problem. To do this, just type <tt>make - clean</tt> and then <tt>make</tt> in the directory that fails to build.</p> -</div> - -<div class="question"> -<p>I've built LLVM and am testing it, but the tests freeze.</p> -</div> - -<div class="answer"> -<p>This is most likely occurring because you built a profile or release - (optimized) build of LLVM and have not specified the same information on the - <tt>gmake</tt> command line.</p> - -<p>For example, if you built LLVM with the command:</p> - -<pre class="doc_code"> -% gmake ENABLE_PROFILING=1 -</pre> - -<p>...then you must run the tests with the following commands:</p> - -<pre class="doc_code"> -% cd llvm/test -% gmake ENABLE_PROFILING=1 -</pre> -</div> - -<div class="question"> -<p>Why do test results differ when I perform different types of builds?</p> -</div> - -<div class="answer"> -<p>The LLVM test suite is dependent upon several features of the LLVM tools and - libraries.</p> - -<p>First, the debugging assertions in code are not enabled in optimized or - profiling builds. Hence, tests that used to fail may pass.</p> - -<p>Second, some tests may rely upon debugging options or behavior that is only - available in the debug build. These tests will fail in an optimized or - profile build.</p> -</div> - -<div class="question"> -<p>Compiling LLVM with GCC 3.3.2 fails, what should I do?</p> -</div> - -<div class="answer"> -<p>This is <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13392">a bug in - GCC</a>, and affects projects other than LLVM. Try upgrading or downgrading - your GCC.</p> -</div> - -<div class="question"> -<p>Compiling LLVM with GCC succeeds, but the resulting tools do not work, what - can be wrong?</p> -</div> - -<div class="answer"> -<p>Several versions of GCC have shown a weakness in miscompiling the LLVM - codebase. Please consult your compiler version (<tt>gcc --version</tt>) to - find out whether it is <a href="GettingStarted.html#brokengcc">broken</a>. - If so, your only option is to upgrade GCC to a known good version.</p> -</div> - -<div class="question"> -<p>After Subversion update, rebuilding gives the error "No rule to make - target".</p> -</div> - -<div class="answer"> -<p>If the error is of the form:</p> - -<pre class="doc_code"> -gmake[2]: *** No rule to make target `/path/to/somefile', needed by -`/path/to/another/file.d'.<br> -Stop. -</pre> - -<p>This may occur anytime files are moved within the Subversion repository or - removed entirely. In this case, the best solution is to erase all - <tt>.d</tt> files, which list dependencies for source files, and rebuild:</p> - -<pre class="doc_code"> -% cd $LLVM_OBJ_DIR -% rm -f `find . -name \*\.d` -% gmake -</pre> - -<p>In other cases, it may be necessary to run <tt>make clean</tt> before - rebuilding.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="felangs">Source Languages</a> -</h2> - -<div> - -<div class="question"> -<p><a name="langs">What source languages are supported?</a></p> -</div> - -<div class="answer"> -<p>LLVM currently has full support for C and C++ source languages. These are - available through both <a href="http://clang.llvm.org/">Clang</a> and - <a href="http://dragonegg.llvm.org/">DragonEgg</a>.</p> - -<p>The PyPy developers are working on integrating LLVM into the PyPy backend so - that PyPy language can translate to LLVM.</p> -</div> - -<div class="question"> -<p><a name="langirgen">I'd like to write a self-hosting LLVM compiler. How - should I interface with the LLVM middle-end optimizers and back-end code - generators?</a></p> -</div> - -<div class="answer"> -<p>Your compiler front-end will communicate with LLVM by creating a module in - the LLVM intermediate representation (IR) format. Assuming you want to write - your language's compiler in the language itself (rather than C++), there are - 3 major ways to tackle generating LLVM IR from a front-end:</p> - -<ul> - <li><strong>Call into the LLVM libraries code using your language's FFI - (foreign function interface).</strong> - - <ul> - <li><em>for:</em> best tracks changes to the LLVM IR, .ll syntax, and .bc - format</li> - - <li><em>for:</em> enables running LLVM optimization passes without a - emit/parse overhead</li> - - <li><em>for:</em> adapts well to a JIT context</li> - - <li><em>against:</em> lots of ugly glue code to write</li> - </ul></li> - - <li> <strong>Emit LLVM assembly from your compiler's native language.</strong> - <ul> - <li><em>for:</em> very straightforward to get started</li> - - <li><em>against:</em> the .ll parser is slower than the bitcode reader - when interfacing to the middle end</li> - - <li><em>against:</em> you'll have to re-engineer the LLVM IR object model - and asm writer in your language</li> - - <li><em>against:</em> it may be harder to track changes to the IR</li> - </ul></li> - - <li><strong>Emit LLVM bitcode from your compiler's native language.</strong> - - <ul> - <li><em>for:</em> can use the more-efficient bitcode reader when - interfacing to the middle end</li> - - <li><em>against:</em> you'll have to re-engineer the LLVM IR object - model and bitcode writer in your language</li> - - <li><em>against:</em> it may be harder to track changes to the IR</li> - </ul></li> -</ul> - -<p>If you go with the first option, the C bindings in include/llvm-c should help - a lot, since most languages have strong support for interfacing with C. The - most common hurdle with calling C from managed code is interfacing with the - garbage collector. The C interface was designed to require very little memory - management, and so is straightforward in this regard.</p> -</div> - -<div class="question"> -<p><a name="langhlsupp">What support is there for a higher level source language - constructs for building a compiler?</a></p> -</div> - -<div class="answer"> -<p>Currently, there isn't much. LLVM supports an intermediate representation - which is useful for code representation but will not support the high level - (abstract syntax tree) representation needed by most compilers. There are no - facilities for lexical nor semantic analysis.</p> -</div> - -<div class="question"> -<p><a name="getelementptr">I don't understand the GetElementPtr - instruction. Help!</a></p> -</div> - -<div class="answer"> -<p>See <a href="GetElementPtr.html">The Often Misunderstood GEP - Instruction</a>.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="cfe">Using the C and C++ Front Ends</a> -</h2> - -<div> - -<div class="question"> -<p><a name="platformindependent">Can I compile C or C++ code to - platform-independent LLVM bitcode?</a></p> -</div> - -<div class="answer"> -<p>No. C and C++ are inherently platform-dependent languages. The most obvious - example of this is the preprocessor. A very common way that C code is made - portable is by using the preprocessor to include platform-specific code. In - practice, information about other platforms is lost after preprocessing, so - the result is inherently dependent on the platform that the preprocessing was - targeting.</p> - -<p>Another example is <tt>sizeof</tt>. It's common for <tt>sizeof(long)</tt> to - vary between platforms. In most C front-ends, <tt>sizeof</tt> is expanded to - a constant immediately, thus hard-wiring a platform-specific detail.</p> - -<p>Also, since many platforms define their ABIs in terms of C, and since LLVM is - lower-level than C, front-ends currently must emit platform-specific IR in - order to have the result conform to the platform ABI.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="cfe_code">Questions about code generated by the demo page</a> -</h2> - -<div> - -<div class="question"> -<p><a name="iosinit">What is this <tt>llvm.global_ctors</tt> and - <tt>_GLOBAL__I_a...</tt> stuff that happens when I <tt>#include - <iostream></tt>?</a></p> -</div> - -<div class="answer"> -<p>If you <tt>#include</tt> the <tt><iostream></tt> header into a C++ - translation unit, the file will probably use - the <tt>std::cin</tt>/<tt>std::cout</tt>/... global objects. However, C++ - does not guarantee an order of initialization between static objects in - different translation units, so if a static ctor/dtor in your .cpp file - used <tt>std::cout</tt>, for example, the object would not necessarily be - automatically initialized before your use.</p> - -<p>To make <tt>std::cout</tt> and friends work correctly in these scenarios, the - STL that we use declares a static object that gets created in every - translation unit that includes <tt><iostream></tt>. This object has a - static constructor and destructor that initializes and destroys the global - iostream objects before they could possibly be used in the file. The code - that you see in the .ll file corresponds to the constructor and destructor - registration code. -</p> - -<p>If you would like to make it easier to <b>understand</b> the LLVM code - generated by the compiler in the demo page, consider using <tt>printf()</tt> - instead of <tt>iostream</tt>s to print values.</p> -</div> - -<!--=========================================================================--> - -<div class="question"> -<p><a name="codedce">Where did all of my code go??</a></p> -</div> - -<div class="answer"> -<p>If you are using the LLVM demo page, you may often wonder what happened to - all of the code that you typed in. Remember that the demo script is running - the code through the LLVM optimizers, so if your code doesn't actually do - anything useful, it might all be deleted.</p> - -<p>To prevent this, make sure that the code is actually needed. For example, if - you are computing some expression, return the value from the function instead - of leaving it in a local variable. If you really want to constrain the - optimizer, you can read from and assign to <tt>volatile</tt> global - variables.</p> -</div> - -<!--=========================================================================--> - -<div class="question"> -<p><a name="undef">What is this "<tt>undef</tt>" thing that shows up in my - code?</a></p> -</div> - -<div class="answer"> -<p><a href="LangRef.html#undef"><tt>undef</tt></a> is the LLVM way of - representing a value that is not defined. You can get these if you do not - initialize a variable before you use it. For example, the C function:</p> - -<pre class="doc_code"> -int X() { int i; return i; } -</pre> - -<p>Is compiled to "<tt>ret i32 undef</tt>" because "<tt>i</tt>" never has a - value specified for it.</p> -</div> - -<!--=========================================================================--> - -<div class="question"> -<p><a name="callconvwrong">Why does instcombine + simplifycfg turn - a call to a function with a mismatched calling convention into "unreachable"? - Why not make the verifier reject it?</a></p> -</div> - -<div class="answer"> -<p>This is a common problem run into by authors of front-ends that are using -custom calling conventions: you need to make sure to set the right calling -convention on both the function and on each call to the function. For example, -this code:</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} -define void @bar() { - call void @foo() - ret void -} -</pre> - -<p>Is optimized to:</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} -define void @bar() { - unreachable -} -</pre> - -<p>... with "opt -instcombine -simplifycfg". This often bites people because -"all their code disappears". Setting the calling convention on the caller and -callee is required for indirect calls to work, so people often ask why not make -the verifier reject this sort of thing.</p> - -<p>The answer is that this code has undefined behavior, but it is not illegal. -If we made it illegal, then every transformation that could potentially create -this would have to ensure that it doesn't, and there is valid code that can -create this sort of construct (in dead code). The sorts of things that can -cause this to happen are fairly contrived, but we still need to accept them. -Here's an example:</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} -define internal void @bar(void()* %FP, i1 %cond) { - br i1 %cond, label %T, label %F -T: - call void %FP() - ret void -F: - call fastcc void %FP() - ret void -} -define void @test() { - %X = or i1 false, false - call void @bar(void()* @foo, i1 %X) - ret void -} -</pre> - -<p>In this example, "test" always passes @foo/false into bar, which ensures that - it is dynamically called with the right calling conv (thus, the code is - perfectly well defined). If you run this through the inliner, you get this - (the explicit "or" is there so that the inliner doesn't dead code eliminate - a bunch of stuff): -</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} -define void @test() { - %X = or i1 false, false - br i1 %X, label %T.i, label %F.i -T.i: - call void @foo() - br label %bar.exit -F.i: - call fastcc void @foo() - br label %bar.exit -bar.exit: - ret void -} -</pre> - -<p>Here you can see that the inlining pass made an undefined call to @foo with - the wrong calling convention. We really don't want to make the inliner have - to know about this sort of thing, so it needs to be valid code. In this case, - dead code elimination can trivially remove the undefined code. However, if %X - was an input argument to @test, the inliner would produce this: -</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} - -define void @test(i1 %X) { - br i1 %X, label %T.i, label %F.i -T.i: - call void @foo() - br label %bar.exit -F.i: - call fastcc void @foo() - br label %bar.exit -bar.exit: - ret void -} -</pre> - -<p>The interesting thing about this is that %X <em>must</em> be false for the -code to be well-defined, but no amount of dead code elimination will be able to -delete the broken call as unreachable. However, since instcombine/simplifycfg -turns the undefined call into unreachable, we end up with a branch on a -condition that goes to unreachable: a branch to unreachable can never happen, so -"-inline -instcombine -simplifycfg" is able to produce:</p> - -<pre class="doc_code"> -define fastcc void @foo() { - ret void -} -define void @test(i1 %X) { -F.i: - call fastcc void @foo() - ret void -} -</pre> - -</div> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/FAQ.rst b/docs/FAQ.rst new file mode 100644 index 0000000000..b0e3ca0456 --- /dev/null +++ b/docs/FAQ.rst @@ -0,0 +1,464 @@ +.. _faq: + +================================ +Frequently Asked Questions (FAQ) +================================ + +.. contents:: + :local: + + +License +======= + +Does the University of Illinois Open Source License really qualify as an "open source" license? +----------------------------------------------------------------------------------------------- +Yes, the license is `certified +<http://www.opensource.org/licenses/UoI-NCSA.php>`_ by the Open Source +Initiative (OSI). + + +Can I modify LLVM source code and redistribute the modified source? +------------------------------------------------------------------- +Yes. The modified source distribution must retain the copyright notice and +follow the three bulletted conditions listed in the `LLVM license +<http://llvm.org/svn/llvm-project/llvm/trunk/LICENSE.TXT>`_. + + +Can I modify the LLVM source code and redistribute binaries or other tools based on it, without redistributing the source? +-------------------------------------------------------------------------------------------------------------------------- +Yes. This is why we distribute LLVM under a less restrictive license than GPL, +as explained in the first question above. + + +Source Code +=========== + +In what language is LLVM written? +--------------------------------- +All of the LLVM tools and libraries are written in C++ with extensive use of +the STL. + + +How portable is the LLVM source code? +------------------------------------- +The LLVM source code should be portable to most modern Unix-like operating +systems. Most of the code is written in standard C++ with operating system +services abstracted to a support library. The tools required to build and +test LLVM have been ported to a plethora of platforms. + +Some porting problems may exist in the following areas: + +* The autoconf/makefile build system relies heavily on UNIX shell tools, + like the Bourne Shell and sed. Porting to systems without these tools + (MacOS 9, Plan 9) will require more effort. + + +Build Problems +============== + +When I run configure, it finds the wrong C compiler. +---------------------------------------------------- +The ``configure`` script attempts to locate first ``gcc`` and then ``cc``, +unless it finds compiler paths set in ``CC`` and ``CXX`` for the C and C++ +compiler, respectively. + +If ``configure`` finds the wrong compiler, either adjust your ``PATH`` +environment variable or set ``CC`` and ``CXX`` explicitly. + + +The ``configure`` script finds the right C compiler, but it uses the LLVM tools from a previous build. What do I do? +--------------------------------------------------------------------------------------------------------------------- +The ``configure`` script uses the ``PATH`` to find executables, so if it's +grabbing the wrong linker/assembler/etc, there are two ways to fix it: + +#. Adjust your ``PATH`` environment variable so that the correct program + appears first in the ``PATH``. This may work, but may not be convenient + when you want them *first* in your path for other work. + +#. Run ``configure`` with an alternative ``PATH`` that is correct. In a + Bourne compatible shell, the syntax would be: + +.. code-block:: bash + + % PATH=[the path without the bad program] ./configure ... + +This is still somewhat inconvenient, but it allows ``configure`` to do its +work without having to adjust your ``PATH`` permanently. + + +When creating a dynamic library, I get a strange GLIBC error. +------------------------------------------------------------- +Under some operating systems (i.e. Linux), libtool does not work correctly if +GCC was compiled with the ``--disable-shared option``. To work around this, +install your own version of GCC that has shared libraries enabled by default. + + +I've updated my source tree from Subversion, and now my build is trying to use a file/directory that doesn't exist. +------------------------------------------------------------------------------------------------------------------- +You need to re-run configure in your object directory. When new Makefiles +are added to the source tree, they have to be copied over to the object tree +in order to be used by the build. + + +I've modified a Makefile in my source tree, but my build tree keeps using the old version. What do I do? +--------------------------------------------------------------------------------------------------------- +If the Makefile already exists in your object tree, you can just run the +following command in the top level directory of your object tree: + +.. code-block:: bash + + % ./config.status <relative path to Makefile>; + +If the Makefile is new, you will have to modify the configure script to copy +it over. + + +I've upgraded to a new version of LLVM, and I get strange build errors. +----------------------------------------------------------------------- +Sometimes, changes to the LLVM source code alters how the build system works. +Changes in ``libtool``, ``autoconf``, or header file dependencies are +especially prone to this sort of problem. + +The best thing to try is to remove the old files and re-build. In most cases, +this takes care of the problem. To do this, just type ``make clean`` and then +``make`` in the directory that fails to build. + + +I've built LLVM and am testing it, but the tests freeze. +-------------------------------------------------------- +This is most likely occurring because you built a profile or release +(optimized) build of LLVM and have not specified the same information on the +``gmake`` command line. + +For example, if you built LLVM with the command: + +.. code-block:: bash + + % gmake ENABLE_PROFILING=1 + +...then you must run the tests with the following commands: + +.. code-block:: bash + + % cd llvm/test + % gmake ENABLE_PROFILING=1 + +Why do test results differ when I perform different types of builds? +-------------------------------------------------------------------- +The LLVM test suite is dependent upon several features of the LLVM tools and +libraries. + +First, the debugging assertions in code are not enabled in optimized or +profiling builds. Hence, tests that used to fail may pass. + +Second, some tests may rely upon debugging options or behavior that is only +available in the debug build. These tests will fail in an optimized or +profile build. + + +Compiling LLVM with GCC 3.3.2 fails, what should I do? +------------------------------------------------------ +This is `a bug in GCC <http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13392>`_, +and affects projects other than LLVM. Try upgrading or downgrading your GCC. + + +Compiling LLVM with GCC succeeds, but the resulting tools do not work, what can be wrong? +----------------------------------------------------------------------------------------- +Several versions of GCC have shown a weakness in miscompiling the LLVM +codebase. Please consult your compiler version (``gcc --version``) to find +out whether it is `broken <GettingStarted.html#brokengcc>`_. If so, your only +option is to upgrade GCC to a known good version. + + +After Subversion update, rebuilding gives the error "No rule to make target". +----------------------------------------------------------------------------- +If the error is of the form: + +.. code-block:: bash + + gmake[2]: *** No rule to make target `/path/to/somefile', + needed by `/path/to/another/file.d'. + Stop. + +This may occur anytime files are moved within the Subversion repository or +removed entirely. In this case, the best solution is to erase all ``.d`` +files, which list dependencies for source files, and rebuild: + +.. code-block:: bash + + % cd $LLVM_OBJ_DIR + % rm -f `find . -name \*\.d` + % gmake + +In other cases, it may be necessary to run ``make clean`` before rebuilding. + + +Source Languages +================ + +What source languages are supported? +------------------------------------ +LLVM currently has full support for C and C++ source languages. These are +available through both `Clang <http://clang.llvm.org/>`_ and `DragonEgg +<http://dragonegg.llvm.org/>`_. + +The PyPy developers are working on integrating LLVM into the PyPy backend so +that PyPy language can translate to LLVM. + + +I'd like to write a self-hosting LLVM compiler. How should I interface with the LLVM middle-end optimizers and back-end code generators? +---------------------------------------------------------------------------------------------------------------------------------------- +Your compiler front-end will communicate with LLVM by creating a module in the +LLVM intermediate representation (IR) format. Assuming you want to write your +language's compiler in the language itself (rather than C++), there are 3 +major ways to tackle generating LLVM IR from a front-end: + +1. **Call into the LLVM libraries code using your language's FFI (foreign + function interface).** + + * *for:* best tracks changes to the LLVM IR, .ll syntax, and .bc format + + * *for:* enables running LLVM optimization passes without a emit/parse + overhead + + * *for:* adapts well to a JIT context + + * *against:* lots of ugly glue code to write + +2. **Emit LLVM assembly from your compiler's native language.** + + * *for:* very straightforward to get started + + * *against:* the .ll parser is slower than the bitcode reader when + interfacing to the middle end + + * *against:* it may be harder to track changes to the IR + +3. **Emit LLVM bitcode from your compiler's native language.** + + * *for:* can use the more-efficient bitcode reader when interfacing to the + middle end + + * *against:* you'll have to re-engineer the LLVM IR object model and bitcode + writer in your language + + * *against:* it may be harder to track changes to the IR + +If you go with the first option, the C bindings in include/llvm-c should help +a lot, since most languages have strong support for interfacing with C. The +most common hurdle with calling C from managed code is interfacing with the +garbage collector. The C interface was designed to require very little memory +management, and so is straightforward in this regard. + +What support is there for a higher level source language constructs for building a compiler? +-------------------------------------------------------------------------------------------- +Currently, there isn't much. LLVM supports an intermediate representation +which is useful for code representation but will not support the high level +(abstract syntax tree) representation needed by most compilers. There are no +facilities for lexical nor semantic analysis. + + +I don't understand the ``GetElementPtr`` instruction. Help! +----------------------------------------------------------- +See `The Often Misunderstood GEP Instruction <GetElementPtr.html>`_. + + +Using the C and C++ Front Ends +============================== + +Can I compile C or C++ code to platform-independent LLVM bitcode? +----------------------------------------------------------------- +No. C and C++ are inherently platform-dependent languages. The most obvious +example of this is the preprocessor. A very common way that C code is made +portable is by using the preprocessor to include platform-specific code. In +practice, information about other platforms is lost after preprocessing, so +the result is inherently dependent on the platform that the preprocessing was +targeting. + +Another example is ``sizeof``. It's common for ``sizeof(long)`` to vary +between platforms. In most C front-ends, ``sizeof`` is expanded to a +constant immediately, thus hard-wiring a platform-specific detail. + +Also, since many platforms define their ABIs in terms of C, and since LLVM is +lower-level than C, front-ends currently must emit platform-specific IR in +order to have the result conform to the platform ABI. + + +Questions about code generated by the demo page +=============================================== + +What is this ``llvm.global_ctors`` and ``_GLOBAL__I_a...`` stuff that happens when I ``#include <iostream>``? +------------------------------------------------------------------------------------------------------------- +If you ``#include`` the ``<iostream>`` header into a C++ translation unit, +the file will probably use the ``std::cin``/``std::cout``/... global objects. +However, C++ does not guarantee an order of initialization between static +objects in different translation units, so if a static ctor/dtor in your .cpp +file used ``std::cout``, for example, the object would not necessarily be +automatically initialized before your use. + +To make ``std::cout`` and friends work correctly in these scenarios, the STL +that we use declares a static object that gets created in every translation +unit that includes ``<iostream>``. This object has a static constructor +and destructor that initializes and destroys the global iostream objects +before they could possibly be used in the file. The code that you see in the +``.ll`` file corresponds to the constructor and destructor registration code. + +If you would like to make it easier to *understand* the LLVM code generated +by the compiler in the demo page, consider using ``printf()`` instead of +``iostream``\s to print values. + + +Where did all of my code go?? +----------------------------- +If you are using the LLVM demo page, you may often wonder what happened to +all of the code that you typed in. Remember that the demo script is running +the code through the LLVM optimizers, so if your code doesn't actually do +anything useful, it might all be deleted. + +To prevent this, make sure that the code is actually needed. For example, if +you are computing some expression, return the value from the function instead +of leaving it in a local variable. If you really want to constrain the +optimizer, you can read from and assign to ``volatile`` global variables. + + +What is this "``undef``" thing that shows up in my code? +-------------------------------------------------------- +``undef`` is the LLVM way of representing a value that is not defined. You +can get these if you do not initialize a variable before you use it. For +example, the C function: + +.. code-block:: c + + int X() { int i; return i; } + +Is compiled to "``ret i32 undef``" because "``i``" never has a value specified +for it. + + +Why does instcombine + simplifycfg turn a call to a function with a mismatched calling convention into "unreachable"? Why not make the verifier reject it? +---------------------------------------------------------------------------------------------------------------------------------------------------------- +This is a common problem run into by authors of front-ends that are using +custom calling conventions: you need to make sure to set the right calling +convention on both the function and on each call to the function. For +example, this code: + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + define void @bar() { + call void @foo() + ret void + } + +Is optimized to: + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + define void @bar() { + unreachable + } + +... with "``opt -instcombine -simplifycfg``". This often bites people because +"all their code disappears". Setting the calling convention on the caller and +callee is required for indirect calls to work, so people often ask why not +make the verifier reject this sort of thing. + +The answer is that this code has undefined behavior, but it is not illegal. +If we made it illegal, then every transformation that could potentially create +this would have to ensure that it doesn't, and there is valid code that can +create this sort of construct (in dead code). The sorts of things that can +cause this to happen are fairly contrived, but we still need to accept them. +Here's an example: + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + define internal void @bar(void()* %FP, i1 %cond) { + br i1 %cond, label %T, label %F + T: + call void %FP() + ret void + F: + call fastcc void %FP() + ret void + } + define void @test() { + %X = or i1 false, false + call void @bar(void()* @foo, i1 %X) + ret void + } + +In this example, "test" always passes ``@foo``/``false`` into ``bar``, which +ensures that it is dynamically called with the right calling conv (thus, the +code is perfectly well defined). If you run this through the inliner, you +get this (the explicit "or" is there so that the inliner doesn't dead code +eliminate a bunch of stuff): + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + define void @test() { + %X = or i1 false, false + br i1 %X, label %T.i, label %F.i + T.i: + call void @foo() + br label %bar.exit + F.i: + call fastcc void @foo() + br label %bar.exit + bar.exit: + ret void + } + +Here you can see that the inlining pass made an undefined call to ``@foo`` +with the wrong calling convention. We really don't want to make the inliner +have to know about this sort of thing, so it needs to be valid code. In this +case, dead code elimination can trivially remove the undefined code. However, +if ``%X`` was an input argument to ``@test``, the inliner would produce this: + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + + define void @test(i1 %X) { + br i1 %X, label %T.i, label %F.i + T.i: + call void @foo() + br label %bar.exit + F.i: + call fastcc void @foo() + br label %bar.exit + bar.exit: + ret void + } + +The interesting thing about this is that ``%X`` *must* be false for the +code to be well-defined, but no amount of dead code elimination will be able +to delete the broken call as unreachable. However, since +``instcombine``/``simplifycfg`` turns the undefined call into unreachable, we +end up with a branch on a condition that goes to unreachable: a branch to +unreachable can never happen, so "``-inline -instcombine -simplifycfg``" is +able to produce: + +.. code-block:: llvm + + define fastcc void @foo() { + ret void + } + define void @test(i1 %X) { + F.i: + call fastcc void @foo() + ret void + } diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html deleted file mode 100644 index bddb1d6ce3..0000000000 --- a/docs/GetElementPtr.html +++ /dev/null @@ -1,753 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>The Often Misunderstood GEP Instruction</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> - <style type="text/css"> - TABLE { text-align: left; border: 1px solid black; border-collapse: collapse; margin: 0 0 0 0; } - </style> -</head> -<body> - -<h1> - The Often Misunderstood GEP Instruction -</h1> - -<ol> - <li><a href="#intro">Introduction</a></li> - <li><a href="#addresses">Address Computation</a> - <ol> - <li><a href="#extra_index">Why is the extra 0 index required?</a></li> - <li><a href="#deref">What is dereferenced by GEP?</a></li> - <li><a href="#firstptr">Why can you index through the first pointer but not - subsequent ones?</a></li> - <li><a href="#lead0">Why don't GEP x,0,0,1 and GEP x,1 alias? </a></li> - <li><a href="#trail0">Why do GEP x,1,0,0 and GEP x,1 alias? </a></li> - <li><a href="#vectors">Can GEP index into vector elements?</a> - <li><a href="#addrspace">What effect do address spaces have on GEPs?</a> - <li><a href="#int">How is GEP different from ptrtoint, arithmetic, and inttoptr?</a></li> - <li><a href="#be">I'm writing a backend for a target which needs custom lowering for GEP. How do I do this?</a> - <li><a href="#vla">How does VLA addressing work with GEPs?</a> - </ol></li> - <li><a href="#rules">Rules</a> - <ol> - <li><a href="#bounds">What happens if an array index is out of bounds?</a> - <li><a href="#negative">Can array indices be negative?</a> - <li><a href="#compare">Can I compare two values computed with GEPs?</a> - <li><a href="#types">Can I do GEP with a different pointer type than the type of the underlying object?</a> - <li><a href="#null">Can I cast an object's address to integer and add it to null?</a> - <li><a href="#ptrdiff">Can I compute the distance between two objects, and add that value to one address to compute the other address?</a> - <li><a href="#tbaa">Can I do type-based alias analysis on LLVM IR?</a> - <li><a href="#overflow">What happens if a GEP computation overflows?</a> - <li><a href="#check">How can I tell if my front-end is following the rules?</a> - </ol></li> - <li><a href="#rationale">Rationale</a> - <ol> - <li><a href="#goals">Why is GEP designed this way?</a></li> - <li><a href="#i32">Why do struct member indices always use i32?</a></li> - <li><a href="#uglygep">What's an uglygep?</a> - </ol></li> - <li><a href="#summary">Summary</a></li> -</ol> - -<div class="doc_author"> - <p>Written by: <a href="mailto:rspencer@reidspencer.com">Reid Spencer</a>.</p> -</div> - - -<!-- *********************************************************************** --> -<h2><a name="intro">Introduction</a></h2> -<!-- *********************************************************************** --> - -<div> - <p>This document seeks to dispel the mystery and confusion surrounding LLVM's - <a href="LangRef.html#i_getelementptr">GetElementPtr</a> (GEP) instruction. - Questions about the wily GEP instruction are - probably the most frequently occurring questions once a developer gets down to - coding with LLVM. Here we lay out the sources of confusion and show that the - GEP instruction is really quite simple. - </p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="addresses">Address Computation</a></h2> -<!-- *********************************************************************** --> -<div> - <p>When people are first confronted with the GEP instruction, they tend to - relate it to known concepts from other programming paradigms, most notably C - array indexing and field selection. GEP closely resembles C array indexing - and field selection, however it's is a little different and this leads to - the following questions.</p> - -<!-- *********************************************************************** --> -<h3> - <a name="firstptr">What is the first index of the GEP instruction?</a> -</h3> -<div> - <p>Quick answer: The index stepping through the first operand.</p> - <p>The confusion with the first index usually arises from thinking about - the GetElementPtr instruction as if it was a C index operator. They aren't the - same. For example, when we write, in "C":</p> - -<div class="doc_code"> -<pre> -AType *Foo; -... -X = &Foo->F; -</pre> -</div> - - <p>it is natural to think that there is only one index, the selection of the - field <tt>F</tt>. However, in this example, <tt>Foo</tt> is a pointer. That - pointer must be indexed explicitly in LLVM. C, on the other hand, indices - through it transparently. To arrive at the same address location as the C - code, you would provide the GEP instruction with two index operands. The - first operand indexes through the pointer; the second operand indexes the - field <tt>F</tt> of the structure, just as if you wrote:</p> - -<div class="doc_code"> -<pre> -X = &Foo[0].F; -</pre> -</div> - - <p>Sometimes this question gets rephrased as:</p> - <blockquote><p><i>Why is it okay to index through the first pointer, but - subsequent pointers won't be dereferenced?</i></p></blockquote> - <p>The answer is simply because memory does not have to be accessed to - perform the computation. The first operand to the GEP instruction must be a - value of a pointer type. The value of the pointer is provided directly to - the GEP instruction as an operand without any need for accessing memory. It - must, therefore be indexed and requires an index operand. Consider this - example:</p> - -<div class="doc_code"> -<pre> -struct munger_struct { - int f1; - int f2; -}; -void munge(struct munger_struct *P) { - P[0].f1 = P[1].f1 + P[2].f2; -} -... -munger_struct Array[3]; -... -munge(Array); -</pre> -</div> - - <p>In this "C" example, the front end compiler (llvm-gcc) will generate three - GEP instructions for the three indices through "P" in the assignment - statement. The function argument <tt>P</tt> will be the first operand of each - of these GEP instructions. The second operand indexes through that pointer. - The third operand will be the field offset into the - <tt>struct munger_struct</tt> type, for either the <tt>f1</tt> or - <tt>f2</tt> field. So, in LLVM assembly the <tt>munge</tt> function looks - like:</p> - -<div class="doc_code"> -<pre> -void %munge(%struct.munger_struct* %P) { -entry: - %tmp = getelementptr %struct.munger_struct* %P, i32 1, i32 0 - %tmp = load i32* %tmp - %tmp6 = getelementptr %struct.munger_struct* %P, i32 2, i32 1 - %tmp7 = load i32* %tmp6 - %tmp8 = add i32 %tmp7, %tmp - %tmp9 = getelementptr %struct.munger_struct* %P, i32 0, i32 0 - store i32 %tmp8, i32* %tmp9 - ret void -} -</pre> -</div> - - <p>In each case the first operand is the pointer through which the GEP - instruction starts. The same is true whether the first operand is an - argument, allocated memory, or a global variable. </p> - <p>To make this clear, let's consider a more obtuse example:</p> - -<div class="doc_code"> -<pre> -%MyVar = uninitialized global i32 -... -%idx1 = getelementptr i32* %MyVar, i64 0 -%idx2 = getelementptr i32* %MyVar, i64 1 -%idx3 = getelementptr i32* %MyVar, i64 2 -</pre> -</div> - - <p>These GEP instructions are simply making address computations from the - base address of <tt>MyVar</tt>. They compute, as follows (using C syntax): - </p> - -<div class="doc_code"> -<pre> -idx1 = (char*) &MyVar + 0 -idx2 = (char*) &MyVar + 4 -idx3 = (char*) &MyVar + 8 -</pre> -</div> - - <p>Since the type <tt>i32</tt> is known to be four bytes long, the indices - 0, 1 and 2 translate into memory offsets of 0, 4, and 8, respectively. No - memory is accessed to make these computations because the address of - <tt>%MyVar</tt> is passed directly to the GEP instructions.</p> - <p>The obtuse part of this example is in the cases of <tt>%idx2</tt> and - <tt>%idx3</tt>. They result in the computation of addresses that point to - memory past the end of the <tt>%MyVar</tt> global, which is only one - <tt>i32</tt> long, not three <tt>i32</tt>s long. While this is legal in LLVM, - it is inadvisable because any load or store with the pointer that results - from these GEP instructions would produce undefined results.</p> -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="extra_index">Why is the extra 0 index required?</a> -</h3> -<!-- *********************************************************************** --> -<div> - <p>Quick answer: there are no superfluous indices.</p> - <p>This question arises most often when the GEP instruction is applied to a - global variable which is always a pointer type. For example, consider - this:</p> - -<div class="doc_code"> -<pre> -%MyStruct = uninitialized global { float*, i32 } -... -%idx = getelementptr { float*, i32 }* %MyStruct, i64 0, i32 1 -</pre> -</div> - - <p>The GEP above yields an <tt>i32*</tt> by indexing the <tt>i32</tt> typed - field of the structure <tt>%MyStruct</tt>. When people first look at it, they - wonder why the <tt>i64 0</tt> index is needed. However, a closer inspection - of how globals and GEPs work reveals the need. Becoming aware of the following - facts will dispel the confusion:</p> - <ol> - <li>The type of <tt>%MyStruct</tt> is <i>not</i> <tt>{ float*, i32 }</tt> - but rather <tt>{ float*, i32 }*</tt>. That is, <tt>%MyStruct</tt> is a - pointer to a structure containing a pointer to a <tt>float</tt> and an - <tt>i32</tt>.</li> - <li>Point #1 is evidenced by noticing the type of the first operand of - the GEP instruction (<tt>%MyStruct</tt>) which is - <tt>{ float*, i32 }*</tt>.</li> - <li>The first index, <tt>i64 0</tt> is required to step over the global - variable <tt>%MyStruct</tt>. Since the first argument to the GEP - instruction must always be a value of pointer type, the first index - steps through that pointer. A value of 0 means 0 elements offset from that - pointer.</li> - <li>The second index, <tt>i32 1</tt> selects the second field of the - structure (the <tt>i32</tt>). </li> - </ol> -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="deref">What is dereferenced by GEP?</a> -</h3> -<div> - <p>Quick answer: nothing.</p> - <p>The GetElementPtr instruction dereferences nothing. That is, it doesn't - access memory in any way. That's what the Load and Store instructions are for. - GEP is only involved in the computation of addresses. For example, consider - this:</p> - -<div class="doc_code"> -<pre> -%MyVar = uninitialized global { [40 x i32 ]* } -... -%idx = getelementptr { [40 x i32]* }* %MyVar, i64 0, i32 0, i64 0, i64 17 -</pre> -</div> - - <p>In this example, we have a global variable, <tt>%MyVar</tt> that is a - pointer to a structure containing a pointer to an array of 40 ints. The - GEP instruction seems to be accessing the 18th integer of the structure's - array of ints. However, this is actually an illegal GEP instruction. It - won't compile. The reason is that the pointer in the structure <i>must</i> - be dereferenced in order to index into the array of 40 ints. Since the - GEP instruction never accesses memory, it is illegal.</p> - <p>In order to access the 18th integer in the array, you would need to do the - following:</p> - -<div class="doc_code"> -<pre> -%idx = getelementptr { [40 x i32]* }* %, i64 0, i32 0 -%arr = load [40 x i32]** %idx -%idx = getelementptr [40 x i32]* %arr, i64 0, i64 17 -</pre> -</div> - - <p>In this case, we have to load the pointer in the structure with a load - instruction before we can index into the array. If the example was changed - to:</p> - -<div class="doc_code"> -<pre> -%MyVar = uninitialized global { [40 x i32 ] } -... -%idx = getelementptr { [40 x i32] }*, i64 0, i32 0, i64 17 -</pre> -</div> - - <p>then everything works fine. In this case, the structure does not contain a - pointer and the GEP instruction can index through the global variable, - into the first field of the structure and access the 18th <tt>i32</tt> in the - array there.</p> -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="lead0">Why don't GEP x,0,0,1 and GEP x,1 alias?</a> -</h3> -<div> - <p>Quick Answer: They compute different address locations.</p> - <p>If you look at the first indices in these GEP - instructions you find that they are different (0 and 1), therefore the address - computation diverges with that index. Consider this example:</p> - -<div class="doc_code"> -<pre> -%MyVar = global { [10 x i32 ] } -%idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 0, i32 0, i64 1 -%idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1 -</pre> -</div> - - <p>In this example, <tt>idx1</tt> computes the address of the second integer - in the array that is in the structure in <tt>%MyVar</tt>, that is - <tt>MyVar+4</tt>. The type of <tt>idx1</tt> is <tt>i32*</tt>. However, - <tt>idx2</tt> computes the address of <i>the next</i> structure after - <tt>%MyVar</tt>. The type of <tt>idx2</tt> is <tt>{ [10 x i32] }*</tt> and its - value is equivalent to <tt>MyVar + 40</tt> because it indexes past the ten - 4-byte integers in <tt>MyVar</tt>. Obviously, in such a situation, the - pointers don't alias.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="trail0">Why do GEP x,1,0,0 and GEP x,1 alias?</a> -</h3> -<div> - <p>Quick Answer: They compute the same address location.</p> - <p>These two GEP instructions will compute the same address because indexing - through the 0th element does not change the address. However, it does change - the type. Consider this example:</p> - -<div class="doc_code"> -<pre> -%MyVar = global { [10 x i32 ] } -%idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 1, i32 0, i64 0 -%idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1 -</pre> -</div> - - <p>In this example, the value of <tt>%idx1</tt> is <tt>%MyVar+40</tt> and - its type is <tt>i32*</tt>. The value of <tt>%idx2</tt> is also - <tt>MyVar+40</tt> but its type is <tt>{ [10 x i32] }*</tt>.</p> -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="vectors">Can GEP index into vector elements?</a> -</h3> -<div> - <p>This hasn't always been forcefully disallowed, though it's not recommended. - It leads to awkward special cases in the optimizers, and fundamental - inconsistency in the IR. In the future, it will probably be outright - disallowed.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="addrspace">What effect do address spaces have on GEPs?</a> -</h3> -<div> - <p>None, except that the address space qualifier on the first operand pointer - type always matches the address space qualifier on the result type.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="int"> - How is GEP different from ptrtoint, arithmetic, and inttoptr? - </a> -</h3> -<div> - <p>It's very similar; there are only subtle differences.</p> - - <p>With ptrtoint, you have to pick an integer type. One approach is to pick i64; - this is safe on everything LLVM supports (LLVM internally assumes pointers - are never wider than 64 bits in many places), and the optimizer will actually - narrow the i64 arithmetic down to the actual pointer size on targets which - don't support 64-bit arithmetic in most cases. However, there are some cases - where it doesn't do this. With GEP you can avoid this problem. - - <p>Also, GEP carries additional pointer aliasing rules. It's invalid to take a - GEP from one object, address into a different separately allocated - object, and dereference it. IR producers (front-ends) must follow this rule, - and consumers (optimizers, specifically alias analysis) benefit from being - able to rely on it. See the <a href="#rules">Rules</a> section for more - information.</p> - - <p>And, GEP is more concise in common cases.</p> - - <p>However, for the underlying integer computation implied, there - is no difference.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="be"> - I'm writing a backend for a target which needs custom lowering for GEP. - How do I do this? - </a> -</h3> -<div> - <p>You don't. The integer computation implied by a GEP is target-independent. - Typically what you'll need to do is make your backend pattern-match - expressions trees involving ADD, MUL, etc., which are what GEP is lowered - into. This has the advantage of letting your code work correctly in more - cases.</p> - - <p>GEP does use target-dependent parameters for the size and layout of data - types, which targets can customize.</p> - - <p>If you require support for addressing units which are not 8 bits, you'll - need to fix a lot of code in the backend, with GEP lowering being only a - small piece of the overall picture.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="vla">How does VLA addressing work with GEPs?</a> -</h3> -<div> - <p>GEPs don't natively support VLAs. LLVM's type system is entirely static, - and GEP address computations are guided by an LLVM type.</p> - - <p>VLA indices can be implemented as linearized indices. For example, an - expression like X[a][b][c], must be effectively lowered into a form - like X[a*m+b*n+c], so that it appears to the GEP as a single-dimensional - array reference.</p> - - <p>This means if you want to write an analysis which understands array - indices and you want to support VLAs, your code will have to be - prepared to reverse-engineer the linearization. One way to solve this - problem is to use the ScalarEvolution library, which always presents - VLA and non-VLA indexing in the same manner.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="rules">Rules</a></h2> -<!-- *********************************************************************** --> -<div> -<!-- *********************************************************************** --> - -<h3> - <a name="bounds">What happens if an array index is out of bounds?</a> -</h3> -<div> - <p>There are two senses in which an array index can be out of bounds.</p> - - <p>First, there's the array type which comes from the (static) type of - the first operand to the GEP. Indices greater than the number of elements - in the corresponding static array type are valid. There is no problem with - out of bounds indices in this sense. Indexing into an array only depends - on the size of the array element, not the number of elements.</p> - - <p>A common example of how this is used is arrays where the size is not known. - It's common to use array types with zero length to represent these. The - fact that the static type says there are zero elements is irrelevant; it's - perfectly valid to compute arbitrary element indices, as the computation - only depends on the size of the array element, not the number of - elements. Note that zero-sized arrays are not a special case here.</p> - - <p>This sense is unconnected with <tt>inbounds</tt> keyword. The - <tt>inbounds</tt> keyword is designed to describe low-level pointer - arithmetic overflow conditions, rather than high-level array - indexing rules. - - <p>Analysis passes which wish to understand array indexing should not - assume that the static array type bounds are respected.</p> - - <p>The second sense of being out of bounds is computing an address that's - beyond the actual underlying allocated object.</p> - - <p>With the <tt>inbounds</tt> keyword, the result value of the GEP is - undefined if the address is outside the actual underlying allocated - object and not the address one-past-the-end.</p> - - <p>Without the <tt>inbounds</tt> keyword, there are no restrictions - on computing out-of-bounds addresses. Obviously, performing a load or - a store requires an address of allocated and sufficiently aligned - memory. But the GEP itself is only concerned with computing addresses.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="negative">Can array indices be negative?</a> -</h3> -<div> - <p>Yes. This is basically a special case of array indices being out - of bounds.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="compare">Can I compare two values computed with GEPs?</a> -</h3> -<div> - <p>Yes. If both addresses are within the same allocated object, or - one-past-the-end, you'll get the comparison result you expect. If either - is outside of it, integer arithmetic wrapping may occur, so the - comparison may not be meaningful.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="types"> - Can I do GEP with a different pointer type than the type of - the underlying object? - </a> -</h3> -<div> - <p>Yes. There are no restrictions on bitcasting a pointer value to an arbitrary - pointer type. The types in a GEP serve only to define the parameters for the - underlying integer computation. They need not correspond with the actual - type of the underlying object.</p> - - <p>Furthermore, loads and stores don't have to use the same types as the type - of the underlying object. Types in this context serve only to specify - memory size and alignment. Beyond that there are merely a hint to the - optimizer indicating how the value will likely be used.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="null"> - Can I cast an object's address to integer and add it to null? - </a> -</h3> -<div> - <p>You can compute an address that way, but if you use GEP to do the add, - you can't use that pointer to actually access the object, unless the - object is managed outside of LLVM.</p> - - <p>The underlying integer computation is sufficiently defined; null has a - defined value -- zero -- and you can add whatever value you want to it.</p> - - <p>However, it's invalid to access (load from or store to) an LLVM-aware - object with such a pointer. This includes GlobalVariables, Allocas, and - objects pointed to by noalias pointers.</p> - - <p>If you really need this functionality, you can do the arithmetic with - explicit integer instructions, and use inttoptr to convert the result to - an address. Most of GEP's special aliasing rules do not apply to pointers - computed from ptrtoint, arithmetic, and inttoptr sequences.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="ptrdiff"> - Can I compute the distance between two objects, and add - that value to one address to compute the other address? - </a> -</h3> -<div> - <p>As with arithmetic on null, You can use GEP to compute an address that - way, but you can't use that pointer to actually access the object if you - do, unless the object is managed outside of LLVM.</p> - - <p>Also as above, ptrtoint and inttoptr provide an alternative way to do this - which do not have this restriction.</p> - -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="tbaa">Can I do type-based alias analysis on LLVM IR?</a> -</h3> -<div> - <p>You can't do type-based alias analysis using LLVM's built-in type system, - because LLVM has no restrictions on mixing types in addressing, loads or - stores.</p> - - <p>LLVM's type-based alias analysis pass uses metadata to describe a different - type system (such as the C type system), and performs type-based aliasing - on top of that. Further details are in the - <a href="LangRef.html#tbaa">language reference</a>.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="overflow">What happens if a GEP computation overflows?</a> -</h3> -<div> - <p>If the GEP lacks the <tt>inbounds</tt> keyword, the value is the result - from evaluating the implied two's complement integer computation. However, - since there's no guarantee of where an object will be allocated in the - address space, such values have limited meaning.</p> - - <p>If the GEP has the <tt>inbounds</tt> keyword, the result value is - undefined (a "<a href="LangRef.html#trapvalues">trap value</a>") if the GEP - overflows (i.e. wraps around the end of the address space).</p> - - <p>As such, there are some ramifications of this for inbounds GEPs: scales - implied by array/vector/pointer indices are always known to be "nsw" since - they are signed values that are scaled by the element size. These values - are also allowed to be negative (e.g. "gep i32 *%P, i32 -1") but the - pointer itself is logically treated as an unsigned value. This means that - GEPs have an asymmetric relation between the pointer base (which is treated - as unsigned) and the offset applied to it (which is treated as signed). The - result of the additions within the offset calculation cannot have signed - overflow, but when applied to the base pointer, there can be signed - overflow. - </p> - - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="check"> - How can I tell if my front-end is following the rules? - </a> -</h3> -<div> - <p>There is currently no checker for the getelementptr rules. Currently, - the only way to do this is to manually check each place in your front-end - where GetElementPtr operators are created.</p> - - <p>It's not possible to write a checker which could find all rule - violations statically. It would be possible to write a checker which - works by instrumenting the code with dynamic checks though. Alternatively, - it would be possible to write a static checker which catches a subset of - possible problems. However, no such checker exists today.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="rationale">Rationale</a></h2> -<!-- *********************************************************************** --> -<div> -<!-- *********************************************************************** --> - -<h3> - <a name="goals">Why is GEP designed this way?</a> -</h3> -<div> - <p>The design of GEP has the following goals, in rough unofficial - order of priority:</p> - <ul> - <li>Support C, C-like languages, and languages which can be - conceptually lowered into C (this covers a lot).</li> - <li>Support optimizations such as those that are common in - C compilers. In particular, GEP is a cornerstone of LLVM's - <a href="LangRef.html#pointeraliasing">pointer aliasing model</a>.</li> - <li>Provide a consistent method for computing addresses so that - address computations don't need to be a part of load and - store instructions in the IR.</li> - <li>Support non-C-like languages, to the extent that it doesn't - interfere with other goals.</li> - <li>Minimize target-specific information in the IR.</li> - </ul> -</div> - -<!-- *********************************************************************** --> -<h3> - <a name="i32">Why do struct member indices always use i32?</a> -</h3> -<div> - <p>The specific type i32 is probably just a historical artifact, however it's - wide enough for all practical purposes, so there's been no need to change it. - It doesn't necessarily imply i32 address arithmetic; it's just an identifier - which identifies a field in a struct. Requiring that all struct indices be - the same reduces the range of possibilities for cases where two GEPs are - effectively the same but have distinct operand types.</p> - -</div> - -<!-- *********************************************************************** --> - -<h3> - <a name="uglygep">What's an uglygep?</a> -</h3> -<div> - <p>Some LLVM optimizers operate on GEPs by internally lowering them into - more primitive integer expressions, which allows them to be combined - with other integer expressions and/or split into multiple separate - integer expressions. If they've made non-trivial changes, translating - back into LLVM IR can involve reverse-engineering the structure of - the addressing in order to fit it into the static type of the original - first operand. It isn't always possibly to fully reconstruct this - structure; sometimes the underlying addressing doesn't correspond with - the static type at all. In such cases the optimizer instead will emit - a GEP with the base pointer casted to a simple address-unit pointer, - using the name "uglygep". This isn't pretty, but it's just as - valid, and it's sufficient to preserve the pointer aliasing guarantees - that GEP provides.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="summary">Summary</a></h2> -<!-- *********************************************************************** --> - -<div> - <p>In summary, here's some things to always remember about the GetElementPtr - instruction:</p> - <ol> - <li>The GEP instruction never accesses memory, it only provides pointer - computations.</li> - <li>The first operand to the GEP instruction is always a pointer and it must - be indexed.</li> - <li>There are no superfluous indices for the GEP instruction.</li> - <li>Trailing zero indices are superfluous for pointer aliasing, but not for - the types of the pointers.</li> - <li>Leading zero indices are not superfluous for pointer aliasing nor the - types of the pointers.</li> - </ol> -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst new file mode 100644 index 0000000000..f6f904b2e3 --- /dev/null +++ b/docs/GetElementPtr.rst @@ -0,0 +1,538 @@ +.. _gep: + +======================================= +The Often Misunderstood GEP Instruction +======================================= + +.. contents:: + :local: + +Introduction +============ + +This document seeks to dispel the mystery and confusion surrounding LLVM's +`GetElementPtr <LangRef.html#i_getelementptr>`_ (GEP) instruction. Questions +about the wily GEP instruction are probably the most frequently occurring +questions once a developer gets down to coding with LLVM. Here we lay out the +sources of confusion and show that the GEP instruction is really quite simple. + +Address Computation +=================== + +When people are first confronted with the GEP instruction, they tend to relate +it to known concepts from other programming paradigms, most notably C array +indexing and field selection. GEP closely resembles C array indexing and field +selection, however it's is a little different and this leads to the following +questions. + +What is the first index of the GEP instruction? +----------------------------------------------- + +Quick answer: The index stepping through the first operand. + +The confusion with the first index usually arises from thinking about the +GetElementPtr instruction as if it was a C index operator. They aren't the +same. For example, when we write, in "C": + +.. code-block:: c++ + + AType *Foo; + ... + X = &Foo->F; + +it is natural to think that there is only one index, the selection of the field +``F``. However, in this example, ``Foo`` is a pointer. That pointer +must be indexed explicitly in LLVM. C, on the other hand, indices through it +transparently. To arrive at the same address location as the C code, you would +provide the GEP instruction with two index operands. The first operand indexes +through the pointer; the second operand indexes the field ``F`` of the +structure, just as if you wrote: + +.. code-block:: c++ + + X = &Foo[0].F; + +Sometimes this question gets rephrased as: + +.. _GEP index through first pointer: + + *Why is it okay to index through the first pointer, but subsequent pointers + won't be dereferenced?* + +The answer is simply because memory does not have to be accessed to perform the +computation. The first operand to the GEP instruction must be a value of a +pointer type. The value of the pointer is provided directly to the GEP +instruction as an operand without any need for accessing memory. It must, +therefore be indexed and requires an index operand. Consider this example: + +.. code-block:: c++ + + struct munger_struct { + int f1; + int f2; + }; + void munge(struct munger_struct *P) { + P[0].f1 = P[1].f1 + P[2].f2; + } + ... + munger_struct Array[3]; + ... + munge(Array); + +In this "C" example, the front end compiler (llvm-gcc) will generate three GEP +instructions for the three indices through "P" in the assignment statement. The +function argument ``P`` will be the first operand of each of these GEP +instructions. The second operand indexes through that pointer. The third +operand will be the field offset into the ``struct munger_struct`` type, for +either the ``f1`` or ``f2`` field. So, in LLVM assembly the ``munge`` function +looks like: + +.. code-block:: llvm + + void %munge(%struct.munger_struct* %P) { + entry: + %tmp = getelementptr %struct.munger_struct* %P, i32 1, i32 0 + %tmp = load i32* %tmp + %tmp6 = getelementptr %struct.munger_struct* %P, i32 2, i32 1 + %tmp7 = load i32* %tmp6 + %tmp8 = add i32 %tmp7, %tmp + %tmp9 = getelementptr %struct.munger_struct* %P, i32 0, i32 0 + store i32 %tmp8, i32* %tmp9 + ret void + } + +In each case the first operand is the pointer through which the GEP instruction +starts. The same is true whether the first operand is an argument, allocated +memory, or a global variable. + +To make this clear, let's consider a more obtuse example: + +.. code-block:: llvm + + %MyVar = uninitialized global i32 + ... + %idx1 = getelementptr i32* %MyVar, i64 0 + %idx2 = getelementptr i32* %MyVar, i64 1 + %idx3 = getelementptr i32* %MyVar, i64 2 + +These GEP instructions are simply making address computations from the base +address of ``MyVar``. They compute, as follows (using C syntax): + +.. code-block:: c++ + + idx1 = (char*) &MyVar + 0 + idx2 = (char*) &MyVar + 4 + idx3 = (char*) &MyVar + 8 + +Since the type ``i32`` is known to be four bytes long, the indices 0, 1 and 2 +translate into memory offsets of 0, 4, and 8, respectively. No memory is +accessed to make these computations because the address of ``%MyVar`` is passed +directly to the GEP instructions. + +The obtuse part of this example is in the cases of ``%idx2`` and ``%idx3``. They +result in the computation of addresses that point to memory past the end of the +``%MyVar`` global, which is only one ``i32`` long, not three ``i32``\s long. +While this is legal in LLVM, it is inadvisable because any load or store with +the pointer that results from these GEP instructions would produce undefined +results. + +Why is the extra 0 index required? +---------------------------------- + +Quick answer: there are no superfluous indices. + +This question arises most often when the GEP instruction is applied to a global +variable which is always a pointer type. For example, consider this: + +.. code-block:: llvm + + %MyStruct = uninitialized global { float*, i32 } + ... + %idx = getelementptr { float*, i32 }* %MyStruct, i64 0, i32 1 + +The GEP above yields an ``i32*`` by indexing the ``i32`` typed field of the +structure ``%MyStruct``. When people first look at it, they wonder why the ``i64 +0`` index is needed. However, a closer inspection of how globals and GEPs work +reveals the need. Becoming aware of the following facts will dispel the +confusion: + +#. The type of ``%MyStruct`` is *not* ``{ float*, i32 }`` but rather ``{ float*, + i32 }*``. That is, ``%MyStruct`` is a pointer to a structure containing a + pointer to a ``float`` and an ``i32``. + +#. Point #1 is evidenced by noticing the type of the first operand of the GEP + instruction (``%MyStruct``) which is ``{ float*, i32 }*``. + +#. The first index, ``i64 0`` is required to step over the global variable + ``%MyStruct``. Since the first argument to the GEP instruction must always + be a value of pointer type, the first index steps through that pointer. A + value of 0 means 0 elements offset from that pointer. + +#. The second index, ``i32 1`` selects the second field of the structure (the + ``i32``). + +What is dereferenced by GEP? +---------------------------- + +Quick answer: nothing. + +The GetElementPtr instruction dereferences nothing. That is, it doesn't access +memory in any way. That's what the Load and Store instructions are for. GEP is +only involved in the computation of addresses. For example, consider this: + +.. code-block:: llvm + + %MyVar = uninitialized global { [40 x i32 ]* } + ... + %idx = getelementptr { [40 x i32]* }* %MyVar, i64 0, i32 0, i64 0, i64 17 + +In this example, we have a global variable, ``%MyVar`` that is a pointer to a +structure containing a pointer to an array of 40 ints. The GEP instruction seems +to be accessing the 18th integer of the structure's array of ints. However, this +is actually an illegal GEP instruction. It won't compile. The reason is that the +pointer in the structure <i>must</i> be dereferenced in order to index into the +array of 40 ints. Since the GEP instruction never accesses memory, it is +illegal. + +In order to access the 18th integer in the array, you would need to do the +following: + +.. code-block:: llvm + + %idx = getelementptr { [40 x i32]* }* %, i64 0, i32 0 + %arr = load [40 x i32]** %idx + %idx = getelementptr [40 x i32]* %arr, i64 0, i64 17 + +In this case, we have to load the pointer in the structure with a load +instruction before we can index into the array. If the example was changed to: + +.. code-block:: llvm + + %MyVar = uninitialized global { [40 x i32 ] } + ... + %idx = getelementptr { [40 x i32] }*, i64 0, i32 0, i64 17 + +then everything works fine. In this case, the structure does not contain a +pointer and the GEP instruction can index through the global variable, into the +first field of the structure and access the 18th ``i32`` in the array there. + +Why don't GEP x,0,0,1 and GEP x,1 alias? +---------------------------------------- + +Quick Answer: They compute different address locations. + +If you look at the first indices in these GEP instructions you find that they +are different (0 and 1), therefore the address computation diverges with that +index. Consider this example: + +.. code-block:: llvm + + %MyVar = global { [10 x i32 ] } + %idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 0, i32 0, i64 1 + %idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1 + +In this example, ``idx1`` computes the address of the second integer in the +array that is in the structure in ``%MyVar``, that is ``MyVar+4``. The type of +``idx1`` is ``i32*``. However, ``idx2`` computes the address of *the next* +structure after ``%MyVar``. The type of ``idx2`` is ``{ [10 x i32] }*`` and its +value is equivalent to ``MyVar + 40`` because it indexes past the ten 4-byte +integers in ``MyVar``. Obviously, in such a situation, the pointers don't +alias. + +Why do GEP x,1,0,0 and GEP x,1 alias? +------------------------------------- + +Quick Answer: They compute the same address location. + +These two GEP instructions will compute the same address because indexing +through the 0th element does not change the address. However, it does change the +type. Consider this example: + +.. code-block:: llvm + + %MyVar = global { [10 x i32 ] } + %idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 1, i32 0, i64 0 + %idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1 + +In this example, the value of ``%idx1`` is ``%MyVar+40`` and its type is +``i32*``. The value of ``%idx2`` is also ``MyVar+40`` but its type is ``{ [10 x +i32] }*``. + +Can GEP index into vector elements? +----------------------------------- + +This hasn't always been forcefully disallowed, though it's not recommended. It +leads to awkward special cases in the optimizers, and fundamental inconsistency +in the IR. In the future, it will probably be outright disallowed. + +What effect do address spaces have on GEPs? +------------------------------------------- + +None, except that the address space qualifier on the first operand pointer type +always matches the address space qualifier on the result type. + +How is GEP different from ``ptrtoint``, arithmetic, and ``inttoptr``? +--------------------------------------------------------------------- + +It's very similar; there are only subtle differences. + +With ptrtoint, you have to pick an integer type. One approach is to pick i64; +this is safe on everything LLVM supports (LLVM internally assumes pointers are +never wider than 64 bits in many places), and the optimizer will actually narrow +the i64 arithmetic down to the actual pointer size on targets which don't +support 64-bit arithmetic in most cases. However, there are some cases where it +doesn't do this. With GEP you can avoid this problem. + +Also, GEP carries additional pointer aliasing rules. It's invalid to take a GEP +from one object, address into a different separately allocated object, and +dereference it. IR producers (front-ends) must follow this rule, and consumers +(optimizers, specifically alias analysis) benefit from being able to rely on +it. See the `Rules`_ section for more information. + +And, GEP is more concise in common cases. + +However, for the underlying integer computation implied, there is no +difference. + + +I'm writing a backend for a target which needs custom lowering for GEP. How do I do this? +----------------------------------------------------------------------------------------- + +You don't. The integer computation implied by a GEP is target-independent. +Typically what you'll need to do is make your backend pattern-match expressions +trees involving ADD, MUL, etc., which are what GEP is lowered into. This has the +advantage of letting your code work correctly in more cases. + +GEP does use target-dependent parameters for the size and layout of data types, +which targets can customize. + +If you require support for addressing units which are not 8 bits, you'll need to +fix a lot of code in the backend, with GEP lowering being only a small piece of +the overall picture. + +How does VLA addressing work with GEPs? +--------------------------------------- + +GEPs don't natively support VLAs. LLVM's type system is entirely static, and GEP +address computations are guided by an LLVM type. + +VLA indices can be implemented as linearized indices. For example, an expression +like ``X[a][b][c]``, must be effectively lowered into a form like +``X[a*m+b*n+c]``, so that it appears to the GEP as a single-dimensional array +reference. + +This means if you want to write an analysis which understands array indices and +you want to support VLAs, your code will have to be prepared to reverse-engineer +the linearization. One way to solve this problem is to use the ScalarEvolution +library, which always presents VLA and non-VLA indexing in the same manner. + +.. _Rules: + +Rules +===== + +What happens if an array index is out of bounds? +------------------------------------------------ + +There are two senses in which an array index can be out of bounds. + +First, there's the array type which comes from the (static) type of the first +operand to the GEP. Indices greater than the number of elements in the +corresponding static array type are valid. There is no problem with out of +bounds indices in this sense. Indexing into an array only depends on the size of +the array element, not the number of elements. + +A common example of how this is used is arrays where the size is not known. +It's common to use array types with zero length to represent these. The fact +that the static type says there are zero elements is irrelevant; it's perfectly +valid to compute arbitrary element indices, as the computation only depends on +the size of the array element, not the number of elements. Note that zero-sized +arrays are not a special case here. + +This sense is unconnected with ``inbounds`` keyword. The ``inbounds`` keyword is +designed to describe low-level pointer arithmetic overflow conditions, rather +than high-level array indexing rules. + +Analysis passes which wish to understand array indexing should not assume that +the static array type bounds are respected. + +The second sense of being out of bounds is computing an address that's beyond +the actual underlying allocated object. + +With the ``inbounds`` keyword, the result value of the GEP is undefined if the +address is outside the actual underlying allocated object and not the address +one-past-the-end. + +Without the ``inbounds`` keyword, there are no restrictions on computing +out-of-bounds addresses. Obviously, performing a load or a store requires an +address of allocated and sufficiently aligned memory. But the GEP itself is only +concerned with computing addresses. + +Can array indices be negative? +------------------------------ + +Yes. This is basically a special case of array indices being out of bounds. + +Can I compare two values computed with GEPs? +-------------------------------------------- + +Yes. If both addresses are within the same allocated object, or +one-past-the-end, you'll get the comparison result you expect. If either is +outside of it, integer arithmetic wrapping may occur, so the comparison may not +be meaningful. + +Can I do GEP with a different pointer type than the type of the underlying object? +---------------------------------------------------------------------------------- + +Yes. There are no restrictions on bitcasting a pointer value to an arbitrary +pointer type. The types in a GEP serve only to define the parameters for the +underlying integer computation. They need not correspond with the actual type of +the underlying object. + +Furthermore, loads and stores don't have to use the same types as the type of +the underlying object. Types in this context serve only to specify memory size +and alignment. Beyond that there are merely a hint to the optimizer indicating +how the value will likely be used. + +Can I cast an object's address to integer and add it to null? +------------------------------------------------------------- + +You can compute an address that way, but if you use GEP to do the add, you can't +use that pointer to actually access the object, unless the object is managed +outside of LLVM. + +The underlying integer computation is sufficiently defined; null has a defined +value --- zero --- and you can add whatever value you want to it. + +However, it's invalid to access (load from or store to) an LLVM-aware object +with such a pointer. This includes ``GlobalVariables``, ``Allocas``, and objects +pointed to by noalias pointers. + +If you really need this functionality, you can do the arithmetic with explicit +integer instructions, and use inttoptr to convert the result to an address. Most +of GEP's special aliasing rules do not apply to pointers computed from ptrtoint, +arithmetic, and inttoptr sequences. + +Can I compute the distance between two objects, and add that value to one address to compute the other address? +--------------------------------------------------------------------------------------------------------------- + +As with arithmetic on null, You can use GEP to compute an address that way, but +you can't use that pointer to actually access the object if you do, unless the +object is managed outside of LLVM. + +Also as above, ptrtoint and inttoptr provide an alternative way to do this which +do not have this restriction. + +Can I do type-based alias analysis on LLVM IR? +---------------------------------------------- + +You can't do type-based alias analysis using LLVM's built-in type system, +because LLVM has no restrictions on mixing types in addressing, loads or stores. + +LLVM's type-based alias analysis pass uses metadata to describe a different type +system (such as the C type system), and performs type-based aliasing on top of +that. Further details are in the `language reference <LangRef.html#tbaa>`_. + +What happens if a GEP computation overflows? +-------------------------------------------- + +If the GEP lacks the ``inbounds`` keyword, the value is the result from +evaluating the implied two's complement integer computation. However, since +there's no guarantee of where an object will be allocated in the address space, +such values have limited meaning. + +If the GEP has the ``inbounds`` keyword, the result value is undefined (a "trap +value") if the GEP overflows (i.e. wraps around the end of the address space). + +As such, there are some ramifications of this for inbounds GEPs: scales implied +by array/vector/pointer indices are always known to be "nsw" since they are +signed values that are scaled by the element size. These values are also +allowed to be negative (e.g. "``gep i32 *%P, i32 -1``") but the pointer itself +is logically treated as an unsigned value. This means that GEPs have an +asymmetric relation between the pointer base (which is treated as unsigned) and +the offset applied to it (which is treated as signed). The result of the +additions within the offset calculation cannot have signed overflow, but when +applied to the base pointer, there can be signed overflow. + +How can I tell if my front-end is following the rules? +------------------------------------------------------ + +There is currently no checker for the getelementptr rules. Currently, the only +way to do this is to manually check each place in your front-end where +GetElementPtr operators are created. + +It's not possible to write a checker which could find all rule violations +statically. It would be possible to write a checker which works by instrumenting +the code with dynamic checks though. Alternatively, it would be possible to +write a static checker which catches a subset of possible problems. However, no +such checker exists today. + +Rationale +========= + +Why is GEP designed this way? +----------------------------- + +The design of GEP has the following goals, in rough unofficial order of +priority: + +* Support C, C-like languages, and languages which can be conceptually lowered + into C (this covers a lot). + +* Support optimizations such as those that are common in C compilers. In + particular, GEP is a cornerstone of LLVM's `pointer aliasing + model <LangRef.html#pointeraliasing>`_. + +* Provide a consistent method for computing addresses so that address + computations don't need to be a part of load and store instructions in the IR. + +* Support non-C-like languages, to the extent that it doesn't interfere with + other goals. + +* Minimize target-specific information in the IR. + +Why do struct member indices always use ``i32``? +------------------------------------------------ + +The specific type i32 is probably just a historical artifact, however it's wide +enough for all practical purposes, so there's been no need to change it. It +doesn't necessarily imply i32 address arithmetic; it's just an identifier which +identifies a field in a struct. Requiring that all struct indices be the same +reduces the range of possibilities for cases where two GEPs are effectively the +same but have distinct operand types. + +What's an uglygep? +------------------ + +Some LLVM optimizers operate on GEPs by internally lowering them into more +primitive integer expressions, which allows them to be combined with other +integer expressions and/or split into multiple separate integer expressions. If +they've made non-trivial changes, translating back into LLVM IR can involve +reverse-engineering the structure of the addressing in order to fit it into the +static type of the original first operand. It isn't always possibly to fully +reconstruct this structure; sometimes the underlying addressing doesn't +correspond with the static type at all. In such cases the optimizer instead will +emit a GEP with the base pointer casted to a simple address-unit pointer, using +the name "uglygep". This isn't pretty, but it's just as valid, and it's +sufficient to preserve the pointer aliasing guarantees that GEP provides. + +Summary +======= + +In summary, here's some things to always remember about the GetElementPtr +instruction: + + +#. The GEP instruction never accesses memory, it only provides pointer + computations. + +#. The first operand to the GEP instruction is always a pointer and it must be + indexed. + +#. There are no superfluous indices for the GEP instruction. + +#. Trailing zero indices are superfluous for pointer aliasing, but not for the + types of the pointers. + +#. Leading zero indices are not superfluous for pointer aliasing nor the types + of the pointers. diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html deleted file mode 100644 index b0ed82409d..0000000000 --- a/docs/GettingStartedVS.html +++ /dev/null @@ -1,368 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>Getting Started with LLVM System for Microsoft Visual Studio</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1> - Getting Started with the LLVM System using Microsoft Visual Studio -</h1> - -<ul> - <li><a href="#overview">Overview</a> - <li><a href="#requirements">Requirements</a> - <ol> - <li><a href="#hardware">Hardware</a> - <li><a href="#software">Software</a> - </ol></li> - <li><a href="#quickstart">Getting Started</a> - <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a> - <li><a href="#problems">Common Problems</a> - <li><a href="#links">Links</a> -</ul> - -<div class="doc_author"> - <p>Written by: <a href="http://llvm.org/">The LLVM Team</a></p> -</div> - - -<!-- *********************************************************************** --> -<h2> - <a name="overview"><b>Overview</b></a> -</h2> -<!-- *********************************************************************** --> - -<div> - - <p>Welcome to LLVM on Windows! This document only covers LLVM on Windows using - Visual Studio, not mingw or cygwin. In order to get started, you first need to - know some basic information.</p> - - <p>There are many different projects that compose LLVM. The first is the LLVM - suite. This contains all of the tools, libraries, and header files needed to - use LLVM. It contains an assembler, disassembler, - bitcode analyzer and bitcode optimizer. It also contains a test suite that can - be used to test the LLVM tools.</p> - - <p>Another useful project on Windows is - <a href="http://clang.llvm.org/">clang</a>. Clang is a C family - ([Objective]C/C++) compiler. Clang mostly works on Windows, but does not - currently understand all of the Microsoft extensions to C and C++. Because of - this, clang cannot parse the C++ standard library included with Visual Studio, - nor parts of the Windows Platform SDK. However, most standard C programs do - compile. Clang can be used to emit bitcode, directly emit object files or - even linked executables using Visual Studio's <tt>link.exe</tt></p> - - <p>The large LLVM test suite cannot be run on the Visual Studio port at this - time.</p> - - <p>Most of the tools build and work. <tt>bugpoint</tt> does build, but does - not work.</p> - - <p>Additional information about the LLVM directory structure and tool chain - can be found on the main <a href="GettingStarted.html">Getting Started</a> - page.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="requirements"><b>Requirements</b></a> -</h2> -<!-- *********************************************************************** --> - -<div> - - <p>Before you begin to use the LLVM system, review the requirements given - below. This may save you some trouble by knowing ahead of time what hardware - and software you will need.</p> - -<!-- ======================================================================= --> -<h3> - <a name="hardware"><b>Hardware</b></a> -</h3> - -<div> - - <p>Any system that can adequately run Visual Studio 2008 is fine. The LLVM - source tree and object files, libraries and executables will consume - approximately 3GB.</p> - -</div> - -<!-- ======================================================================= --> -<h3><a name="software"><b>Software</b></a></h3> -<div> - - <p>You will need Visual Studio 2008 or higher. Earlier versions of Visual - Studio have bugs, are not completely compatible, or do not support the C++ - standard well enough.</p> - - <p>You will also need the <a href="http://www.cmake.org/">CMake</a> build - system since it generates the project files you will use to build with.</p> - - <p>If you would like to run the LLVM tests you will need - <a href="http://www.python.org/">Python</a>. Versions 2.4-2.7 are known to - work. You will need <a href="http://gnuwin32.sourceforge.net/">"GnuWin32"</a> - tools, too.</p> - - <p>Do not install the LLVM directory tree into a path containing spaces (e.g. - C:\Documents and Settings\...) as the configure step will fail.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="quickstart"><b>Getting Started</b></a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Here's the short story for getting up and running quickly with LLVM:</p> - -<ol> - <li>Read the documentation.</li> - <li>Seriously, read the documentation.</li> - <li>Remember that you were warned twice about reading the documentation.</li> - - <li>Get the Source Code - <ul> - <li>With the distributed files: - <ol> - <li><tt>cd <i>where-you-want-llvm-to-live</i></tt> - <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt> - <i> or use WinZip</i> - <li><tt>cd llvm</tt></li> - </ol></li> - - <li>With anonymous Subversion access: - <ol> - <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li> - <li><tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li> - <li><tt>cd llvm</tt></li> - </ol></li> - </ul></li> - - <li> Use <a href="http://www.cmake.org/">CMake</a> to generate up-to-date - project files: - <ul> - <li>Once CMake is installed then the simplest way is to just start the - CMake GUI, select the directory where you have LLVM extracted to, and the - default options should all be fine. One option you may really want to - change, regardless of anything else, might be the CMAKE_INSTALL_PREFIX - setting to select a directory to INSTALL to once compiling is complete, - although installation is not mandatory for using LLVM. Another important - option is LLVM_TARGETS_TO_BUILD, which controls the LLVM target - architectures that are included on the build. - <li>See the <a href="CMake.html">LLVM CMake guide</a> for - detailed information about how to configure the LLVM - build.</li> - </ul> - </li> - - <li>Start Visual Studio - <ul> - <li>In the directory you created the project files will have - an <tt>llvm.sln</tt> file, just double-click on that to open - Visual Studio.</li> - </ul></li> - - <li>Build the LLVM Suite: - <ul> - <li>The projects may still be built individually, but - to build them all do not just select all of them in batch build (as some - are meant as configuration projects), but rather select and build just - the ALL_BUILD project to build everything, or the INSTALL project, which - first builds the ALL_BUILD project, then installs the LLVM headers, libs, - and other useful things to the directory set by the CMAKE_INSTALL_PREFIX - setting when you first configured CMake.</li> - <li>The Fibonacci project is a sample program that uses the JIT. - Modify the project's debugging properties to provide a numeric - command line argument or run it from the command line. The - program will print the corresponding fibonacci value.</li> - </ul></li> - - <li>Test LLVM on Visual Studio: - <ul> - <li>If %PATH% does not contain GnuWin32, you may specify LLVM_LIT_TOOLS_DIR - on CMake for the path to GnuWin32.</li> - <li>You can run LLVM tests by merely building the project - "check". The test results will be shown in the VS output - window.</li> - </ul> - </li> - - <!-- FIXME: Is it up-to-date? --> - <li>Test LLVM: - <ul> - <li>The LLVM tests can be run by <tt>cd</tt>ing to the llvm source directory - and running: - -<div class="doc_code"> -<pre> -% llvm-lit test -</pre> -</div> - - <p>Note that quite a few of these test will fail.</p> - </li> - - <li>A specific test or test directory can be run with: - -<div class="doc_code"> -<pre> -% llvm-lit test/path/to/test -</pre> -</div> - </li> - </ul> -</ol> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="tutorial">An Example Using the LLVM Tool Chain</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<ol> - <li><p>First, create a simple C file, name it 'hello.c':</p> - -<div class="doc_code"> -<pre> -#include <stdio.h> -int main() { - printf("hello world\n"); - return 0; -} -</pre></div></li> - - <li><p>Next, compile the C file into a LLVM bitcode file:</p> - -<div class="doc_code"> -<pre> -% clang -c hello.c -emit-llvm -o hello.bc -</pre> -</div> - - <p>This will create the result file <tt>hello.bc</tt> which is the LLVM - bitcode that corresponds the the compiled program and the library - facilities that it required. You can execute this file directly using - <tt>lli</tt> tool, compile it to native assembly with the <tt>llc</tt>, - optimize or analyze it further with the <tt>opt</tt> tool, etc.</p> - - <p>Alternatively you can directly output an executable with clang with: - </p> - -<div class="doc_code"> -<pre> -% clang hello.c -o hello.exe -</pre> -</div> - - <p>The <tt>-o hello.exe</tt> is required because clang currently outputs - <tt>a.out</tt> when neither <tt>-o</tt> nor <tt>-c</tt> are given.</p> - - <li><p>Run the program using the just-in-time compiler:</p> - -<div class="doc_code"> -<pre> -% lli hello.bc -</pre> -</div> - - <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly - code:</p> - -<div class="doc_code"> -<pre> -% llvm-dis < hello.bc | more -</pre> -</div></li> - - <li><p>Compile the program to object code using the LLC code generator:</p> - -<div class="doc_code"> -<pre> -% llc -filetype=obj hello.bc -</pre> -</div></li> - - <li><p>Link to binary using Microsoft link:</p> - -<div class="doc_code"> -<pre> -% link hello.obj -defaultlib:libcmt -</pre> -</div> - - <li><p>Execute the native code program:</p> - -<div class="doc_code"> -<pre> -% hello.exe -</pre> -</div></li> -</ol> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="problems">Common Problems</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If you are having problems building or using LLVM, or if you have any other -general questions about LLVM, please consult the <a href="FAQ.html">Frequently -Asked Questions</a> page.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="links">Links</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>This document is just an <b>introduction</b> to how to use LLVM to do -some simple things... there are many more interesting and complicated things -that you can do that aren't documented here (but we'll gladly accept a patch -if you want to write something up!). For more information about LLVM, check -out:</p> - -<ul> - <li><a href="http://llvm.org/">LLVM homepage</a></li> - <li><a href="http://llvm.org/doxygen/">LLVM doxygen tree</a></li> -</ul> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst new file mode 100644 index 0000000000..7d773b7900 --- /dev/null +++ b/docs/GettingStartedVS.rst @@ -0,0 +1,234 @@ +.. _winvs: + +================================================================== +Getting Started with the LLVM System using Microsoft Visual Studio +================================================================== + +.. contents:: + :local: + + +Overview +======== +Welcome to LLVM on Windows! This document only covers LLVM on Windows using +Visual Studio, not mingw or cygwin. In order to get started, you first need to +know some basic information. + +There are many different projects that compose LLVM. The first is the LLVM +suite. This contains all of the tools, libraries, and header files needed to +use LLVM. It contains an assembler, disassembler, +bitcode analyzer and bitcode optimizer. It also contains a test suite that can +be used to test the LLVM tools. + +Another useful project on Windows is `Clang <http://clang.llvm.org/>`_. +Clang is a C family ([Objective]C/C++) compiler. Clang mostly works on +Windows, but does not currently understand all of the Microsoft extensions +to C and C++. Because of this, clang cannot parse the C++ standard library +included with Visual Studio, nor parts of the Windows Platform SDK. However, +most standard C programs do compile. Clang can be used to emit bitcode, +directly emit object files or even linked executables using Visual Studio's +``link.exe``. + +The large LLVM test suite cannot be run on the Visual Studio port at this +time. + +Most of the tools build and work. ``bugpoint`` does build, but does +not work. + +Additional information about the LLVM directory structure and tool chain +can be found on the main `Getting Started <GettingStarted.html>`_ page. + + +Requirements +============ +Before you begin to use the LLVM system, review the requirements given +below. This may save you some trouble by knowing ahead of time what hardware +and software you will need. + +Hardware +-------- +Any system that can adequately run Visual Studio 2008 is fine. The LLVM +source tree and object files, libraries and executables will consume +approximately 3GB. + +Software +-------- +You will need Visual Studio 2008 or higher. Earlier versions of Visual +Studio have bugs, are not completely compatible, or do not support the C++ +standard well enough. + +You will also need the `CMake <http://www.cmake.org/>`_ build system since it +generates the project files you will use to build with. + +If you would like to run the LLVM tests you will need `Python +<http://www.python.org/>`_. Versions 2.4-2.7 are known to work. You will need +`GnuWin32 <http://gnuwin32.sourceforge.net/>`_ tools, too. + +Do not install the LLVM directory tree into a path containing spaces (e.g. +``C:\Documents and Settings\...``) as the configure step will fail. + + +Getting Started +=============== +Here's the short story for getting up and running quickly with LLVM: + +1. Read the documentation. +2. Seriously, read the documentation. +3. Remember that you were warned twice about reading the documentation. +4. Get the Source Code + + * With the distributed files: + + 1. ``cd <where-you-want-llvm-to-live>`` + 2. ``gunzip --stdout llvm-VERSION.tar.gz | tar -xvf -`` + (*or use WinZip*) + 3. ``cd llvm`` + + * With anonymous Subversion access: + + 1. ``cd <where-you-want-llvm-to-live>`` + 2. ``svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm`` + 3. ``cd llvm`` + +5. Use `CMake <http://www.cmake.org/>`_ to generate up-to-date project files: + + * Once CMake is installed then the simplest way is to just start the + CMake GUI, select the directory where you have LLVM extracted to, and + the default options should all be fine. One option you may really + want to change, regardless of anything else, might be the + ``CMAKE_INSTALL_PREFIX`` setting to select a directory to INSTALL to + once compiling is complete, although installation is not mandatory for + using LLVM. Another important option is ``LLVM_TARGETS_TO_BUILD``, + which controls the LLVM target architectures that are included on the + build. + * See the `LLVM CMake guide <CMake.html>`_ for detailed information about + how to configure the LLVM build. + +6. Start Visual Studio + + * In the directory you created the project files will have an ``llvm.sln`` + file, just double-click on that to open Visual Studio. + +7. Build the LLVM Suite: + + * The projects may still be built individually, but to build them all do + not just select all of them in batch build (as some are meant as + configuration projects), but rather select and build just the + ``ALL_BUILD`` project to build everything, or the ``INSTALL`` project, + which first builds the ``ALL_BUILD`` project, then installs the LLVM + headers, libs, and other useful things to the directory set by the + ``CMAKE_INSTALL_PREFIX`` setting when you first configured CMake. + * The Fibonacci project is a sample program that uses the JIT. Modify the + project's debugging properties to provide a numeric command line argument + or run it from the command line. The program will print the + corresponding fibonacci value. + +8. Test LLVM on Visual Studio: + + * If ``%PATH%`` does not contain GnuWin32, you may specify + ``LLVM_LIT_TOOLS_DIR`` on CMake for the path to GnuWin32. + * You can run LLVM tests by merely building the project "check". The test + results will be shown in the VS output window. + +.. FIXME: Is it up-to-date? + +9. Test LLVM: + + * The LLVM tests can be run by changing directory to the llvm source + directory and running: + + .. code-block:: bat + + C:\..\llvm> llvm-lit test + + Note that quite a few of these test will fail. + + A specific test or test directory can be run with: + + .. code-block:: bat + + C:\..\llvm> llvm-lit test/path/to/test + + +An Example Using the LLVM Tool Chain +==================================== + +1. First, create a simple C file, name it '``hello.c``': + + .. code-block:: c + + #include <stdio.h> + int main() { + printf("hello world\n"); + return 0; + } + +2. Next, compile the C file into a LLVM bitcode file: + + .. code-block:: bat + + C:\..> clang -c hello.c -emit-llvm -o hello.bc + + This will create the result file ``hello.bc`` which is the LLVM bitcode + that corresponds the the compiled program and the library facilities that + it required. You can execute this file directly using ``lli`` tool, + compile it to native assembly with the ``llc``, optimize or analyze it + further with the ``opt`` tool, etc. + + Alternatively you can directly output an executable with clang with: + + .. code-block:: bat + + C:\..> clang hello.c -o hello.exe + + The ``-o hello.exe`` is required because clang currently outputs ``a.out`` + when neither ``-o`` nor ``-c`` are given. + +3. Run the program using the just-in-time compiler: + + .. code-block:: bat + + C:\..> lli hello.bc + +4. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code: + + .. code-block:: bat + + C:\..> llvm-dis < hello.bc | more + +5. Compile the program to object code using the LLC code generator: + + .. code-block:: bat + + C:\..> llc -filetype=obj hello.bc + +6. Link to binary using Microsoft link: + + .. code-block:: bat + + C:\..> link hello.obj -defaultlib:libcmt + +7. Execute the native code program: + + .. code-block:: bat + + C:\..> hello.exe + + +Common Problems +=============== +If you are having problems building or using LLVM, or if you have any other +general questions about LLVM, please consult the `Frequently Asked Questions +<FAQ.html>`_ page. + + +Links +===== +This document is just an **introduction** to how to use LLVM to do some simple +things... there are many more interesting and complicated things that you can +do that aren't documented here (but we'll gladly accept a patch if you want to +write something up!). For more information about LLVM, check out: + +* `LLVM homepage <http://llvm.org/>`_ +* `LLVM doxygen tree <http://llvm.org/doxygen/>`_ + diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html index 0fa8329921..39f8385129 100644 --- a/docs/HowToSubmitABug.html +++ b/docs/HowToSubmitABug.html @@ -223,12 +223,12 @@ we have chased down ended up being bugs in the program being compiled, not LLVM.</p> <p>Once you determine that the program itself is not buggy, you should choose -which code generator you wish to compile the program with (e.g. C backend, the -JIT, or LLC) and optionally a series of LLVM passes to run. For example:</p> +which code generator you wish to compile the program with (e.g. LLC or the JIT) +and optionally a series of LLVM passes to run. For example:</p> <div class="doc_code"> <p><tt> -<b>bugpoint</b> -run-cbe [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p> +<b>bugpoint</b> -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p> </div> <p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass diff --git a/docs/LangRef.html b/docs/LangRef.html index f13f13909b..ba653dbd49 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -838,9 +838,32 @@ define i32 @main() { <i>; i32()* </i> <p>Global variables define regions of memory allocated at compilation time instead of run-time. Global variables may optionally be initialized, may have an explicit section to be placed in, and may have an optional explicit - alignment specified. A variable may be defined as "thread_local", which + alignment specified.</p> + +<p>A variable may be defined as <tt>thread_local</tt>, which means that it will not be shared by threads (each thread will have a - separated copy of the variable). A variable may be defined as a global + separated copy of the variable). Not all targets support thread-local + variables. Optionally, a TLS model may be specified:</p> + +<dl> + <dt><b><tt>localdynamic</tt></b>:</dt> + <dd>For variables that are only used within the current shared library.</dd> + + <dt><b><tt>initialexec</tt></b>:</dt> + <dd>For variables in modules that will not be loaded dynamically.</dd> + + <dt><b><tt>localexec</tt></b>:</dt> + <dd>For variables defined in the executable and only used within it.</dd> +</dl> + +<p>The models correspond to the ELF TLS models; see + <a href="http://people.redhat.com/drepper/tls.pdf">ELF + Handling For Thread-Local Storage</a> for more information on under which + circumstances the different models may be used. The target may choose a + different TLS model if the specified model is not supported, or if a better + choice of model can be made.</p> + +<p>A variable may be defined as a global "constant," which indicates that the contents of the variable will <b>never</b> be modified (enabling better optimization, allowing the global data to be placed in the read-only section of an executable, etc). @@ -893,6 +916,13 @@ define i32 @main() { <i>; i32()* </i> @G = addrspace(5) constant float 1.0, section "foo", align 4 </pre> +<p>The following example defines a thread-local global with + the <tt>initialexec</tt> TLS model:</p> + +<pre class="doc_code"> +@G = thread_local(initialexec) global i32 0, align 4 +</pre> + </div> @@ -4739,7 +4769,7 @@ IfUnequal: <h5>Arguments:</h5> <p>The first two operands of a '<tt>shufflevector</tt>' instruction are vectors - with types that match each other. The third argument is a shuffle mask whose + with the same type. The third argument is a shuffle mask whose element type is always 'i32'. The result of the instruction is a vector whose length is the same as the shuffle mask and whose element type is the same as the element type of the first two operands.</p> diff --git a/docs/Lexicon.html b/docs/Lexicon.html deleted file mode 100644 index 60d90167c9..0000000000 --- a/docs/Lexicon.html +++ /dev/null @@ -1,294 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>The LLVM Lexicon</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> - <meta name="author" content="Various"> - <meta name="description" - content="A glossary of terms used with the LLVM project."> -</head> -<body> -<h1>The LLVM Lexicon</h1> -<p class="doc_warning">NOTE: This document is a work in progress!</p> -<!-- *********************************************************************** --> -<h2>Table Of Contents</h2> -<!-- *********************************************************************** --> -<div> - <table> - <tr><th colspan="8"><b>- <a href="#A">A</a> -</b></th></tr> - <tr> - <td><a href="#ADCE">ADCE</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#B">B</a> -</b></th></tr> - <tr> - <td><a href="#BURS">BURS</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#C">C</a> -</b></th></tr> - <tr> - <td><a href="#CSE">CSE</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#D">D</a> -</b></th></tr> - <tr> - <td><a href="#DAG">DAG</a></td> - <td><a href="#Derived_Pointer">Derived Pointer</a></td> - <td><a href="#DSA">DSA</a></td> - <td><a href="#DSE">DSE</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#F">F</a> -</b></th></tr> - <tr> - <td><a href="#FCA">FCA</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#G">G</a> -</b></th></tr> - <tr> - <td><a href="#GC">GC</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#I">I</a> -</b></th></tr> - <tr> - <td><a href="#IPA">IPA</a></td> - <td><a href="#IPO">IPO</a></td> - <td><a href="#ISel">ISel</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#L">L</a> -</b></th></tr> - <tr> - <td><a href="#LCSSA">LCSSA</a></td> - <td><a href="#LICM">LICM</a></td> - <td><a href="#Load-VN">Load-VN</a></td> - <td><a href="#LTO">LTO</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#M">M</a> -</b></th></tr> - <tr> - <td><a href="#MC">MC</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#O">O</a> -</b></th></tr> - <tr> - <td><a href="#Object_Pointer">Object Pointer</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#P">P</a> -</b></th></tr> - <tr> - <td><a href="#PRE">PRE</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#R">R</a> -</b></th></tr> - <tr> - <td><a href="#RAUW">RAUW</a></td> - <td><a href="#Reassociation">Reassociation</a></td> - <td><a href="#Root">Root</a></td> - <td><a href="#RPO">RPO</a></td> - </tr> - <tr><th colspan="8"><b>- <a href="#S">S</a> -</b></th></tr> - <tr> - <td><a href="#Safe_Point">Safe Point</a></td> - <td><a href="#SCC">SCC</a></td> - <td><a href="#SCCP">SCCP</a></td> - <td><a href="#SDISel">SDISel</a></td> - <td><a href="#SRoA">SRoA</a></td> - <td><a href="#Stack_Map">Stack Map</a></td> - </tr> - </table> -</div> - -<!-- *********************************************************************** --> -<h2>Definitions</h2> -<!-- *********************************************************************** --> -<div> -<!-- _______________________________________________________________________ --> -<h3><a name="A">- A -</a></h3> -<div> - <dl> - <dt><a name="ADCE"><b>ADCE</b></a></dt> - <dd>Aggressive Dead Code Elimination</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="B">- B -</a></h3> -<div> - <dl> - <dt><a name="BURS"><b>BURS</b></a></dt> - <dd>Bottom Up Rewriting System—A method of instruction selection for - code generation. An example is the <a -href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="C">- C -</a></h3> -<div> - <dl> - <dt><a name="CSE"><b>CSE</b></a></dt> - <dd>Common Subexpression Elimination. An optimization that removes common - subexpression compuation. For example <tt>(a+b)*(a+b)</tt> has two - subexpressions that are the same: <tt>(a+b)</tt>. This optimization would - perform the addition only once and then perform the multiply (but only if - it's compulationally correct/safe). - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="D">- D -</a></h3> -<div> - <dl> - <dt><a name="DAG"><b>DAG</b></a></dt> - <dd>Directed Acyclic Graph</dd> - <dt><a name="Derived_Pointer"><b>Derived Pointer</b></a></dt> - <dd>A pointer to the interior of an object, such that a garbage collector - is unable to use the pointer for reachability analysis. While a derived - pointer is live, the corresponding object pointer must be kept in a root, - otherwise the collector might free the referenced object. With copying - collectors, derived pointers pose an additional hazard that they may be - invalidated at any <a href="Safe_Point">safe point</a>. This term is used in - opposition to <a href="#Object_Pointer">object pointer</a>.</dd> - <dt><a name="DSA"><b>DSA</b></a></dt> - <dd>Data Structure Analysis</dd> - <dt><a name="DSE"><b>DSE</b></a></dt> - <dd>Dead Store Elimination</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="F">- F -</a></h3> -<div> - <dl> - <dt><a name="FCA"><b>FCA</b></a></dt> - <dd>First Class Aggregate</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="G">- G -</a></h3> -<div> - <dl> - <dt><a name="GC"><b>GC</b></a></dt> - <dd>Garbage Collection. The practice of using reachability analysis instead - of explicit memory management to reclaim unused memory.</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="H">- H -</a></h3> -<div> - <dl> - <dt><a name="Heap"><b>Heap</b></a></dt> - <dd>In garbage collection, the region of memory which is managed using - reachability analysis.</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="I">- I -</a></h3> -<div> - <dl> - <dt><a name="IPA"><b>IPA</b></a></dt> - <dd>Inter-Procedural Analysis. Refers to any variety of code analysis that - occurs between procedures, functions or compilation units (modules).</dd> - <dt><a name="IPO"><b>IPO</b></a></dt> - <dd>Inter-Procedural Optimization. Refers to any variety of code - optimization that occurs between procedures, functions or compilation units - (modules).</dd> - <dt><a name="ISel"><b>ISel</b></a></dt> - <dd>Instruction Selection.</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="L">- L -</a></h3> -<div> - <dl> - <dt><a name="LCSSA"><b>LCSSA</b></a></dt> - <dd>Loop-Closed Static Single Assignment Form</dd> - <dt><a name="LICM"><b>LICM</b></a></dt> - <dd>Loop Invariant Code Motion</dd> - <dt><a name="Load-VN"><b>Load-VN</b></a></dt> - <dd>Load Value Numbering</dd> - <dt><a name="LTO"><b>LTO</b></a></dt> - <dd>Link-Time Optimization</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="M">- M -</a></h3> -<div> - <dl> - <dt><a name="MC"><b>MC</b></a></dt> - <dd>Machine Code</dd> - </dl> -</div> -<!-- _______________________________________________________________________ --> -<h3><a name="O">- O -</a></h3> -<div> - <dl> - <dt><a name="Object_Pointer"><b>Object Pointer</b></a></dt> - <dd>A pointer to an object such that the garbage collector is able to trace - references contained within the object. This term is used in opposition to - <a href="#Derived_Pointer">derived pointer</a>.</dd> - </dl> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="P">- P -</a></h3> -<div> - <dl> - <dt><a name="PRE"><b>PRE</b></a></dt> - <dd>Partial Redundancy Elimination</dd> - </dl> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="R">- R -</a></h3> -<div> - <dl> - <dt><a name="RAUW"><b>RAUW</b></a></dt> <dd>An abbreviation for Replace - All Uses With. The functions User::replaceUsesOfWith(), - Value::replaceAllUsesWith(), and Constant::replaceUsesOfWithOnConstant() - implement the replacement of one Value with another by iterating over its - def/use chain and fixing up all of the pointers to point to the new value. - See also <a href="ProgrammersManual.html#iterate_chains">def/use chains</a>. - </dd> - <dt><a name="Reassociation"><b>Reassociation</b></a></dt> <dd>Rearranging - associative expressions to promote better redundancy elimination and other - optimization. For example, changing (A+B-A) into (B+A-A), permitting it to - be optimized into (B+0) then (B).</dd> - <dt><a name="Root"><b>Root</b></a></dt> <dd>In garbage collection, a - pointer variable lying outside of the <a href="#Heap">heap</a> from which - the collector begins its reachability analysis. In the context of code - generation, "root" almost always refers to a "stack root" -- a local or - temporary variable within an executing function.</dd> - <dt><a name="RPO"><b>RPO</b></a></dt> <dd>Reverse postorder</dd> - </dl> -</div> - -<!-- _______________________________________________________________________ --> -<h3><a name="S">- S -</a></h3> -<div> - <dl> - <dt><a name="Safe_Point"><b>Safe Point</b></a></dt> - <dd>In garbage collection, it is necessary to identify <a href="#Root">stack - roots</a> so that reachability analysis may proceed. It may be infeasible to - provide this information for every instruction, so instead the information - may is calculated only at designated safe points. With a copying collector, - <a href="#Derived_Pointers">derived pointers</a> must not be retained across - safe points and <a href="#Object_Pointers">object pointers</a> must be - reloaded from stack roots.</dd> - <dt><a name="SDISel"><b>SDISel</b></a></dt> - <dd>Selection DAG Instruction Selection.</dd> - <dt><a name="SCC"><b>SCC</b></a></dt> - <dd>Strongly Connected Component</dd> - <dt><a name="SCCP"><b>SCCP</b></a></dt> - <dd>Sparse Conditional Constant Propagation</dd> - <dt><a name="SRoA"><b>SRoA</b></a></dt> - <dd>Scalar Replacement of Aggregates</dd> - <dt><a name="SSA"><b>SSA</b></a></dt> - <dd>Static Single Assignment</dd> - <dt><a name="Stack_Map"><b>Stack Map</b></a></dt> - <dd>In garbage collection, metadata emitted by the code generator which - identifies <a href="#Root">roots</a> within the stack frame of an executing - function.</dd> - </dl> -</div> - -</div> -<!-- *********************************************************************** --> -<hr> -<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a><a - href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a><a - href="http://llvm.org/">The LLVM Team</a><br> -<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> -Last modified: $Date$ -</address> -<!-- vim: sw=2 ---> -</body> -</html> diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst new file mode 100644 index 0000000000..6ebe61429f --- /dev/null +++ b/docs/Lexicon.rst @@ -0,0 +1,194 @@ +.. _lexicon: + +================ +The LLVM Lexicon +================ + +.. note:: + + This document is a work in progress! + +Definitions +=========== + +A +- + +**ADCE** + Aggressive Dead Code Elimination + +B +- + +**BURS** + + Bottom Up Rewriting System --- A method of instruction selection for code + generation. An example is the `BURG + <http://www.program-transformation.org/Transform/BURG>`_ tool. + +C +- + +**CSE** + Common Subexpression Elimination. An optimization that removes common + subexpression compuation. For example ``(a+b)*(a+b)`` has two subexpressions + that are the same: ``(a+b)``. This optimization would perform the addition + only once and then perform the multiply (but only if it's compulationally + correct/safe). + +D +- + +**DAG** + Directed Acyclic Graph + +.. _derived pointer: +.. _derived pointers: + +**Derived Pointer** + A pointer to the interior of an object, such that a garbage collector is + unable to use the pointer for reachability analysis. While a derived pointer + is live, the corresponding object pointer must be kept in a root, otherwise + the collector might free the referenced object. With copying collectors, + derived pointers pose an additional hazard that they may be invalidated at + any `safe point`_. This term is used in opposition to `object pointer`_. + +**DSA** + Data Structure Analysis + +**DSE** + Dead Store Elimination + +F +- + +**FCA** + First Class Aggregate + +G +- + +**GC** + Garbage Collection. The practice of using reachability analysis instead of + explicit memory management to reclaim unused memory. + +H +- + +.. _heap: + +**Heap** + In garbage collection, the region of memory which is managed using + reachability analysis. + +I +- + +**IPA** + Inter-Procedural Analysis. Refers to any variety of code analysis that + occurs between procedures, functions or compilation units (modules). + +**IPO** + Inter-Procedural Optimization. Refers to any variety of code optimization + that occurs between procedures, functions or compilation units (modules). + +**ISel** + Instruction Selection + +L +- + +**LCSSA** + Loop-Closed Static Single Assignment Form + +**LICM** + Loop Invariant Code Motion + +**Load-VN** + Load Value Numbering + +**LTO** + Link-Time Optimization + +M +- + +**MC** + Machine Code + +O +- +.. _object pointer: +.. _object pointers: + +**Object Pointer** + A pointer to an object such that the garbage collector is able to trace + references contained within the object. This term is used in opposition to + `derived pointer`_. + +P +- + +**PRE** + Partial Redundancy Elimination + +R +- + +**RAUW** + + Replace All Uses With. The functions ``User::replaceUsesOfWith()``, + ``Value::replaceAllUsesWith()``, and + ``Constant::replaceUsesOfWithOnConstant()`` implement the replacement of one + Value with another by iterating over its def/use chain and fixing up all of + the pointers to point to the new value. See + also `def/use chains <ProgrammersManual.html#iterate_chains>`_. + +**Reassociation** + Rearranging associative expressions to promote better redundancy elimination + and other optimization. For example, changing ``(A+B-A)`` into ``(B+A-A)``, + permitting it to be optimized into ``(B+0)`` then ``(B)``. + +.. _roots: +.. _stack roots: + +**Root** + In garbage collection, a pointer variable lying outside of the `heap`_ from + which the collector begins its reachability analysis. In the context of code + generation, "root" almost always refers to a "stack root" --- a local or + temporary variable within an executing function.</dd> + +**RPO** + Reverse postorder + +S +- + +.. _safe point: + +**Safe Point** + In garbage collection, it is necessary to identify `stack roots`_ so that + reachability analysis may proceed. It may be infeasible to provide this + information for every instruction, so instead the information may is + calculated only at designated safe points. With a copying collector, + `derived pointers`_ must not be retained across safe points and `object + pointers`_ must be reloaded from stack roots. + +**SDISel** + Selection DAG Instruction Selection. + +**SCC** + Strongly Connected Component + +**SCCP** + Sparse Conditional Constant Propagation + +**SRoA** + Scalar Replacement of Aggregates + +**SSA** + Static Single Assignment + +**Stack Map** + In garbage collection, metadata emitted by the code generator which + identifies `roots`_ within the stack frame of an executing function. diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html deleted file mode 100644 index 8063fa8510..0000000000 --- a/docs/LinkTimeOptimization.html +++ /dev/null @@ -1,401 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Link Time Optimization: Design and Implementation</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> - -<h1> - LLVM Link Time Optimization: Design and Implementation -</h1> - -<ul> - <li><a href="#desc">Description</a></li> - <li><a href="#design">Design Philosophy</a> - <ul> - <li><a href="#example1">Example of link time optimization</a></li> - <li><a href="#alternative_approaches">Alternative Approaches</a></li> - </ul></li> - <li><a href="#multiphase">Multi-phase communication between LLVM and linker</a> - <ul> - <li><a href="#phase1">Phase 1 : Read LLVM Bitcode Files</a></li> - <li><a href="#phase2">Phase 2 : Symbol Resolution</a></li> - <li><a href="#phase3">Phase 3 : Optimize Bitcode Files</a></li> - <li><a href="#phase4">Phase 4 : Symbol Resolution after optimization</a></li> - </ul></li> - <li><a href="#lto">libLTO</a> - <ul> - <li><a href="#lto_module_t">lto_module_t</a></li> - <li><a href="#lto_code_gen_t">lto_code_gen_t</a></li> - </ul> -</ul> - -<div class="doc_author"> -<p>Written by Devang Patel and Nick Kledzik</p> -</div> - -<!-- *********************************************************************** --> -<h2> -<a name="desc">Description</a> -</h2> -<!-- *********************************************************************** --> - -<div> -<p> -LLVM features powerful intermodular optimizations which can be used at link -time. Link Time Optimization (LTO) is another name for intermodular optimization -when performed during the link stage. This document describes the interface -and design between the LTO optimizer and the linker.</p> -</div> - -<!-- *********************************************************************** --> -<h2> -<a name="design">Design Philosophy</a> -</h2> -<!-- *********************************************************************** --> - -<div> -<p> -The LLVM Link Time Optimizer provides complete transparency, while doing -intermodular optimization, in the compiler tool chain. Its main goal is to let -the developer take advantage of intermodular optimizations without making any -significant changes to the developer's makefiles or build system. This is -achieved through tight integration with the linker. In this model, the linker -treates LLVM bitcode files like native object files and allows mixing and -matching among them. The linker uses <a href="#lto">libLTO</a>, a shared -object, to handle LLVM bitcode files. This tight integration between -the linker and LLVM optimizer helps to do optimizations that are not possible -in other models. The linker input allows the optimizer to avoid relying on -conservative escape analysis. -</p> - -<!-- ======================================================================= --> -<h3> - <a name="example1">Example of link time optimization</a> -</h3> - -<div> - <p>The following example illustrates the advantages of LTO's integrated - approach and clean interface. This example requires a system linker which - supports LTO through the interface described in this document. Here, - clang transparently invokes system linker. </p> - <ul> - <li> Input source file <tt>a.c</tt> is compiled into LLVM bitcode form. - <li> Input source file <tt>main.c</tt> is compiled into native object code. - </ul> -<pre class="doc_code"> ---- a.h --- -extern int foo1(void); -extern void foo2(void); -extern void foo4(void); - ---- a.c --- -#include "a.h" - -static signed int i = 0; - -void foo2(void) { - i = -1; -} - -static int foo3() { - foo4(); - return 10; -} - -int foo1(void) { - int data = 0; - - if (i < 0) - data = foo3(); - - data = data + 42; - return data; -} - ---- main.c --- -#include <stdio.h> -#include "a.h" - -void foo4(void) { - printf("Hi\n"); -} - -int main() { - return foo1(); -} - ---- command lines --- -$ clang -emit-llvm -c a.c -o a.o # <-- a.o is LLVM bitcode file -$ clang -c main.c -o main.o # <-- main.o is native object file -$ clang a.o main.o -o main # <-- standard link command without any modifications -</pre> - -<ul> - <li>In this example, the linker recognizes that <tt>foo2()</tt> is an - externally visible symbol defined in LLVM bitcode file. The linker - completes its usual symbol resolution pass and finds that <tt>foo2()</tt> - is not used anywhere. This information is used by the LLVM optimizer and - it removes <tt>foo2()</tt>.</li> - <li>As soon as <tt>foo2()</tt> is removed, the optimizer recognizes that condition - <tt>i < 0</tt> is always false, which means <tt>foo3()</tt> is never - used. Hence, the optimizer also removes <tt>foo3()</tt>.</li> - <li>And this in turn, enables linker to remove <tt>foo4()</tt>.</li> -</ul> - -<p>This example illustrates the advantage of tight integration with the - linker. Here, the optimizer can not remove <tt>foo3()</tt> without the - linker's input.</p> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="alternative_approaches">Alternative Approaches</a> -</h3> - -<div> - <dl> - <dt><b>Compiler driver invokes link time optimizer separately.</b></dt> - <dd>In this model the link time optimizer is not able to take advantage of - information collected during the linker's normal symbol resolution phase. - In the above example, the optimizer can not remove <tt>foo2()</tt> without - the linker's input because it is externally visible. This in turn prohibits - the optimizer from removing <tt>foo3()</tt>.</dd> - <dt><b>Use separate tool to collect symbol information from all object - files.</b></dt> - <dd>In this model, a new, separate, tool or library replicates the linker's - capability to collect information for link time optimization. Not only is - this code duplication difficult to justify, but it also has several other - disadvantages. For example, the linking semantics and the features - provided by the linker on various platform are not unique. This means, - this new tool needs to support all such features and platforms in one - super tool or a separate tool per platform is required. This increases - maintenance cost for link time optimizer significantly, which is not - necessary. This approach also requires staying synchronized with linker - developements on various platforms, which is not the main focus of the link - time optimizer. Finally, this approach increases end user's build time due - to the duplication of work done by this separate tool and the linker itself. - </dd> - </dl> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="multiphase">Multi-phase communication between libLTO and linker</a> -</h2> - -<div> - <p>The linker collects information about symbol defininitions and uses in - various link objects which is more accurate than any information collected - by other tools during typical build cycles. The linker collects this - information by looking at the definitions and uses of symbols in native .o - files and using symbol visibility information. The linker also uses - user-supplied information, such as a list of exported symbols. LLVM - optimizer collects control flow information, data flow information and knows - much more about program structure from the optimizer's point of view. - Our goal is to take advantage of tight integration between the linker and - the optimizer by sharing this information during various linking phases. -</p> - -<!-- ======================================================================= --> -<h3> - <a name="phase1">Phase 1 : Read LLVM Bitcode Files</a> -</h3> - -<div> - <p>The linker first reads all object files in natural order and collects - symbol information. This includes native object files as well as LLVM bitcode - files. To minimize the cost to the linker in the case that all .o files - are native object files, the linker only calls <tt>lto_module_create()</tt> - when a supplied object file is found to not be a native object file. If - <tt>lto_module_create()</tt> returns that the file is an LLVM bitcode file, - the linker - then iterates over the module using <tt>lto_module_get_symbol_name()</tt> and - <tt>lto_module_get_symbol_attribute()</tt> to get all symbols defined and - referenced. - This information is added to the linker's global symbol table. -</p> - <p>The lto* functions are all implemented in a shared object libLTO. This - allows the LLVM LTO code to be updated independently of the linker tool. - On platforms that support it, the shared object is lazily loaded. -</p> -</div> - -<!-- ======================================================================= --> -<h3> - <a name="phase2">Phase 2 : Symbol Resolution</a> -</h3> - -<div> - <p>In this stage, the linker resolves symbols using global symbol table. - It may report undefined symbol errors, read archive members, replace - weak symbols, etc. The linker is able to do this seamlessly even though it - does not know the exact content of input LLVM bitcode files. If dead code - stripping is enabled then the linker collects the list of live symbols. - </p> -</div> - -<!-- ======================================================================= --> -<h3> - <a name="phase3">Phase 3 : Optimize Bitcode Files</a> -</h3> -<div> - <p>After symbol resolution, the linker tells the LTO shared object which - symbols are needed by native object files. In the example above, the linker - reports that only <tt>foo1()</tt> is used by native object files using - <tt>lto_codegen_add_must_preserve_symbol()</tt>. Next the linker invokes - the LLVM optimizer and code generators using <tt>lto_codegen_compile()</tt> - which returns a native object file creating by merging the LLVM bitcode files - and applying various optimization passes. -</p> -</div> - -<!-- ======================================================================= --> -<h3> - <a name="phase4">Phase 4 : Symbol Resolution after optimization</a> -</h3> - -<div> - <p>In this phase, the linker reads optimized a native object file and - updates the internal global symbol table to reflect any changes. The linker - also collects information about any changes in use of external symbols by - LLVM bitcode files. In the example above, the linker notes that - <tt>foo4()</tt> is not used any more. If dead code stripping is enabled then - the linker refreshes the live symbol information appropriately and performs - dead code stripping.</p> - <p>After this phase, the linker continues linking as if it never saw LLVM - bitcode files.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> -<a name="lto">libLTO</a> -</h2> - -<div> - <p><tt>libLTO</tt> is a shared object that is part of the LLVM tools, and - is intended for use by a linker. <tt>libLTO</tt> provides an abstract C - interface to use the LLVM interprocedural optimizer without exposing details - of LLVM's internals. The intention is to keep the interface as stable as - possible even when the LLVM optimizer continues to evolve. It should even - be possible for a completely different compilation technology to provide - a different libLTO that works with their object files and the standard - linker tool.</p> - -<!-- ======================================================================= --> -<h3> - <a name="lto_module_t">lto_module_t</a> -</h3> - -<div> - -<p>A non-native object file is handled via an <tt>lto_module_t</tt>. -The following functions allow the linker to check if a file (on disk -or in a memory buffer) is a file which libLTO can process:</p> - -<pre class="doc_code"> -lto_module_is_object_file(const char*) -lto_module_is_object_file_for_target(const char*, const char*) -lto_module_is_object_file_in_memory(const void*, size_t) -lto_module_is_object_file_in_memory_for_target(const void*, size_t, const char*) -</pre> - -<p>If the object file can be processed by libLTO, the linker creates a -<tt>lto_module_t</tt> by using one of</p> - -<pre class="doc_code"> -lto_module_create(const char*) -lto_module_create_from_memory(const void*, size_t) -</pre> - -<p>and when done, the handle is released via</p> - -<pre class="doc_code"> -lto_module_dispose(lto_module_t) -</pre> - -<p>The linker can introspect the non-native object file by getting the number of -symbols and getting the name and attributes of each symbol via:</p> - -<pre class="doc_code"> -lto_module_get_num_symbols(lto_module_t) -lto_module_get_symbol_name(lto_module_t, unsigned int) -lto_module_get_symbol_attribute(lto_module_t, unsigned int) -</pre> - -<p>The attributes of a symbol include the alignment, visibility, and kind.</p> -</div> - -<!-- ======================================================================= --> -<h3> - <a name="lto_code_gen_t">lto_code_gen_t</a> -</h3> - -<div> - -<p>Once the linker has loaded each non-native object files into an -<tt>lto_module_t</tt>, it can request libLTO to process them all and -generate a native object file. This is done in a couple of steps. -First, a code generator is created with:</p> - -<pre class="doc_code">lto_codegen_create()</pre> - -<p>Then, each non-native object file is added to the code generator with:</p> - -<pre class="doc_code"> -lto_codegen_add_module(lto_code_gen_t, lto_module_t) -</pre> - -<p>The linker then has the option of setting some codegen options. Whether or -not to generate DWARF debug info is set with:</p> - -<pre class="doc_code">lto_codegen_set_debug_model(lto_code_gen_t)</pre> - -<p>Which kind of position independence is set with:</p> - -<pre class="doc_code">lto_codegen_set_pic_model(lto_code_gen_t) </pre> - -<p>And each symbol that is referenced by a native object file or otherwise must -not be optimized away is set with:</p> - -<pre class="doc_code"> -lto_codegen_add_must_preserve_symbol(lto_code_gen_t, const char*) -</pre> - -<p>After all these settings are done, the linker requests that a native object -file be created from the modules with the settings using:</p> - -<pre class="doc_code">lto_codegen_compile(lto_code_gen_t, size*)</pre> - -<p>which returns a pointer to a buffer containing the generated native -object file. The linker then parses that and links it with the rest -of the native object files.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - Devang Patel and Nick Kledzik<br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> - diff --git a/docs/LinkTimeOptimization.rst b/docs/LinkTimeOptimization.rst new file mode 100644 index 0000000000..53d673e406 --- /dev/null +++ b/docs/LinkTimeOptimization.rst @@ -0,0 +1,298 @@ +.. _lto: + +====================================================== +LLVM Link Time Optimization: Design and Implementation +====================================================== + +.. contents:: + :local: + +Description +=========== + +LLVM features powerful intermodular optimizations which can be used at link +time. Link Time Optimization (LTO) is another name for intermodular +optimization when performed during the link stage. This document describes the +interface and design between the LTO optimizer and the linker. + +Design Philosophy +================= + +The LLVM Link Time Optimizer provides complete transparency, while doing +intermodular optimization, in the compiler tool chain. Its main goal is to let +the developer take advantage of intermodular optimizations without making any +significant changes to the developer's makefiles or build system. This is +achieved through tight integration with the linker. In this model, the linker +treates LLVM bitcode files like native object files and allows mixing and +matching among them. The linker uses `libLTO`_, a shared object, to handle LLVM +bitcode files. This tight integration between the linker and LLVM optimizer +helps to do optimizations that are not possible in other models. The linker +input allows the optimizer to avoid relying on conservative escape analysis. + +Example of link time optimization +--------------------------------- + +The following example illustrates the advantages of LTO's integrated approach +and clean interface. This example requires a system linker which supports LTO +through the interface described in this document. Here, clang transparently +invokes system linker. + +* Input source file ``a.c`` is compiled into LLVM bitcode form. +* Input source file ``main.c`` is compiled into native object code. + +.. code-block:: c++ + + --- a.h --- + extern int foo1(void); + extern void foo2(void); + extern void foo4(void); + + --- a.c --- + #include "a.h" + + static signed int i = 0; + + void foo2(void) { + i = -1; + } + + static int foo3() { + foo4(); + return 10; + } + + int foo1(void) { + int data = 0; + + if (i < 0) + data = foo3(); + + data = data + 42; + return data; + } + + --- main.c --- + #include <stdio.h> + #include "a.h" + + void foo4(void) { + printf("Hi\n"); + } + + int main() { + return foo1(); + } + +.. code-block:: bash + + --- command lines --- + % clang -emit-llvm -c a.c -o a.o # <-- a.o is LLVM bitcode file + % clang -c main.c -o main.o # <-- main.o is native object file + % clang a.o main.o -o main # <-- standard link command without modifications + +* In this example, the linker recognizes that ``foo2()`` is an externally + visible symbol defined in LLVM bitcode file. The linker completes its usual + symbol resolution pass and finds that ``foo2()`` is not used + anywhere. This information is used by the LLVM optimizer and it + removes ``foo2()``.</li> + +* As soon as ``foo2()`` is removed, the optimizer recognizes that condition ``i + < 0`` is always false, which means ``foo3()`` is never used. Hence, the + optimizer also removes ``foo3()``. + +* And this in turn, enables linker to remove ``foo4()``. + +This example illustrates the advantage of tight integration with the +linker. Here, the optimizer can not remove ``foo3()`` without the linker's +input. + +Alternative Approaches +---------------------- + +**Compiler driver invokes link time optimizer separately.** + In this model the link time optimizer is not able to take advantage of + information collected during the linker's normal symbol resolution phase. + In the above example, the optimizer can not remove ``foo2()`` without the + linker's input because it is externally visible. This in turn prohibits the + optimizer from removing ``foo3()``. + +**Use separate tool to collect symbol information from all object files.** + In this model, a new, separate, tool or library replicates the linker's + capability to collect information for link time optimization. Not only is + this code duplication difficult to justify, but it also has several other + disadvantages. For example, the linking semantics and the features provided + by the linker on various platform are not unique. This means, this new tool + needs to support all such features and platforms in one super tool or a + separate tool per platform is required. This increases maintenance cost for + link time optimizer significantly, which is not necessary. This approach + also requires staying synchronized with linker developements on various + platforms, which is not the main focus of the link time optimizer. Finally, + this approach increases end user's build time due to the duplication of work + done by this separate tool and the linker itself. + +Multi-phase communication between ``libLTO`` and linker +======================================================= + +The linker collects information about symbol defininitions and uses in various +link objects which is more accurate than any information collected by other +tools during typical build cycles. The linker collects this information by +looking at the definitions and uses of symbols in native .o files and using +symbol visibility information. The linker also uses user-supplied information, +such as a list of exported symbols. LLVM optimizer collects control flow +information, data flow information and knows much more about program structure +from the optimizer's point of view. Our goal is to take advantage of tight +integration between the linker and the optimizer by sharing this information +during various linking phases. + +Phase 1 : Read LLVM Bitcode Files +--------------------------------- + +The linker first reads all object files in natural order and collects symbol +information. This includes native object files as well as LLVM bitcode files. +To minimize the cost to the linker in the case that all .o files are native +object files, the linker only calls ``lto_module_create()`` when a supplied +object file is found to not be a native object file. If ``lto_module_create()`` +returns that the file is an LLVM bitcode file, the linker then iterates over the +module using ``lto_module_get_symbol_name()`` and +``lto_module_get_symbol_attribute()`` to get all symbols defined and referenced. +This information is added to the linker's global symbol table. + + +The lto* functions are all implemented in a shared object libLTO. This allows +the LLVM LTO code to be updated independently of the linker tool. On platforms +that support it, the shared object is lazily loaded. + +Phase 2 : Symbol Resolution +--------------------------- + +In this stage, the linker resolves symbols using global symbol table. It may +report undefined symbol errors, read archive members, replace weak symbols, etc. +The linker is able to do this seamlessly even though it does not know the exact +content of input LLVM bitcode files. If dead code stripping is enabled then the +linker collects the list of live symbols. + +Phase 3 : Optimize Bitcode Files +-------------------------------- + +After symbol resolution, the linker tells the LTO shared object which symbols +are needed by native object files. In the example above, the linker reports +that only ``foo1()`` is used by native object files using +``lto_codegen_add_must_preserve_symbol()``. Next the linker invokes the LLVM +optimizer and code generators using ``lto_codegen_compile()`` which returns a +native object file creating by merging the LLVM bitcode files and applying +various optimization passes. + +Phase 4 : Symbol Resolution after optimization +---------------------------------------------- + +In this phase, the linker reads optimized a native object file and updates the +internal global symbol table to reflect any changes. The linker also collects +information about any changes in use of external symbols by LLVM bitcode +files. In the example above, the linker notes that ``foo4()`` is not used any +more. If dead code stripping is enabled then the linker refreshes the live +symbol information appropriately and performs dead code stripping. + +After this phase, the linker continues linking as if it never saw LLVM bitcode +files. + +.. _libLTO: + +``libLTO`` +========== + +``libLTO`` is a shared object that is part of the LLVM tools, and is intended +for use by a linker. ``libLTO`` provides an abstract C interface to use the LLVM +interprocedural optimizer without exposing details of LLVM's internals. The +intention is to keep the interface as stable as possible even when the LLVM +optimizer continues to evolve. It should even be possible for a completely +different compilation technology to provide a different libLTO that works with +their object files and the standard linker tool. + +``lto_module_t`` +---------------- + +A non-native object file is handled via an ``lto_module_t``. The following +functions allow the linker to check if a file (on disk or in a memory buffer) is +a file which libLTO can process: + +.. code-block:: c + + lto_module_is_object_file(const char*) + lto_module_is_object_file_for_target(const char*, const char*) + lto_module_is_object_file_in_memory(const void*, size_t) + lto_module_is_object_file_in_memory_for_target(const void*, size_t, const char*) + +If the object file can be processed by ``libLTO``, the linker creates a +``lto_module_t`` by using one of: + +.. code-block:: c + + lto_module_create(const char*) + lto_module_create_from_memory(const void*, size_t) + +and when done, the handle is released via + +.. code-block:: c + + lto_module_dispose(lto_module_t) + + +The linker can introspect the non-native object file by getting the number of +symbols and getting the name and attributes of each symbol via: + +.. code-block:: c + + lto_module_get_num_symbols(lto_module_t) + lto_module_get_symbol_name(lto_module_t, unsigned int) + lto_module_get_symbol_attribute(lto_module_t, unsigned int) + +The attributes of a symbol include the alignment, visibility, and kind. + +``lto_code_gen_t`` +------------------ + +Once the linker has loaded each non-native object files into an +``lto_module_t``, it can request ``libLTO`` to process them all and generate a +native object file. This is done in a couple of steps. First, a code generator +is created with: + +.. code-block:: c + + lto_codegen_create() + +Then, each non-native object file is added to the code generator with: + +.. code-block:: c + + lto_codegen_add_module(lto_code_gen_t, lto_module_t) + +The linker then has the option of setting some codegen options. Whether or not +to generate DWARF debug info is set with: + +.. code-block:: c + + lto_codegen_set_debug_model(lto_code_gen_t) + +Which kind of position independence is set with: + +.. code-block:: c + + lto_codegen_set_pic_model(lto_code_gen_t) + +And each symbol that is referenced by a native object file or otherwise must not +be optimized away is set with: + +.. code-block:: c + + lto_codegen_add_must_preserve_symbol(lto_code_gen_t, const char*) + +After all these settings are done, the linker requests that a native object file +be created from the modules with the settings using: + +.. code-block:: c + + lto_codegen_compile(lto_code_gen_t, size*) + +which returns a pointer to a buffer containing the generated native object file. +The linker then parses that and links it with the rest of the native object +files. diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html deleted file mode 100644 index 961b1e52d1..0000000000 --- a/docs/MakefileGuide.html +++ /dev/null @@ -1,1039 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>LLVM Makefile Guide</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1>LLVM Makefile Guide</h1> - -<ol> - <li><a href="#introduction">Introduction</a></li> - <li><a href="#general">General Concepts</a> - <ol> - <li><a href="#projects">Projects</a></li> - <li><a href="#varvals">Variable Values</a></li> - <li><a href="#including">Including Makefiles</a> - <ol> - <li><a href="#Makefile">Makefile</a></li> - <li><a href="#Makefile.common">Makefile.common</a></li> - <li><a href="#Makefile.config">Makefile.config</a></li> - <li><a href="#Makefile.rules">Makefile.rules</a></li> - </ol> - </li> - <li><a href="#Comments">Comments</a></li> - </ol> - </li> - <li><a href="#tutorial">Tutorial</a> - <ol> - <li><a href="#libraries">Libraries</a> - <ol> - <li><a href="#BCModules">Bitcode Modules</a></li> - <li><a href="#LoadableModules">Loadable Modules</a></li> - </ol> - </li> - <li><a href="#tools">Tools</a> - <ol> - <li><a href="#JIT">JIT Tools</a></li> - </ol> - </li> - <li><a href="#projects">Projects</a></li> - </ol> - </li> - <li><a href="#targets">Targets Supported</a> - <ol> - <li><a href="#all">all</a></li> - <li><a href="#all-local">all-local</a></li> - <li><a href="#check">check</a></li> - <li><a href="#check-local">check-local</a></li> - <li><a href="#clean">clean</a></li> - <li><a href="#clean-local">clean-local</a></li> - <li><a href="#dist">dist</a></li> - <li><a href="#dist-check">dist-check</a></li> - <li><a href="#dist-clean">dist-clean</a></li> - <li><a href="#install">install</a></li> - <li><a href="#preconditions">preconditions</a></li> - <li><a href="#printvars">printvars</a></li> - <li><a href="#reconfigure">reconfigure</a></li> - <li><a href="#spotless">spotless</a></li> - <li><a href="#tags">tags</a></li> - <li><a href="#uninstall">uninstall</a></li> - </ol> - </li> - <li><a href="#variables">Using Variables</a> - <ol> - <li><a href="#setvars">Control Variables</a></li> - <li><a href="#overvars">Override Variables</a></li> - <li><a href="#getvars">Readable Variables</a></li> - <li><a href="#intvars">Internal Variables</a></li> - </ol> - </li> -</ol> - -<div class="doc_author"> - <p>Written by <a href="mailto:reid@x10sys.com">Reid Spencer</a></p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="introduction">Introduction</a></h2> -<!-- *********************************************************************** --> - -<div> - <p>This document provides <em>usage</em> information about the LLVM makefile - system. While loosely patterned after the BSD makefile system, LLVM has taken - a departure from BSD in order to implement additional features needed by LLVM. - Although makefile systems such as automake were attempted at one point, it - has become clear that the features needed by LLVM and the Makefile norm are - too great to use a more limited tool. Consequently, LLVM requires simply GNU - Make 3.79, a widely portable makefile processor. LLVM unabashedly makes heavy - use of the features of GNU Make so the dependency on GNU Make is firm. If - you're not familiar with <tt>make</tt>, it is recommended that you read the - <a href="http://www.gnu.org/software/make/manual/make.html">GNU Makefile - Manual</a>.</p> - <p>While this document is rightly part of the - <a href="ProgrammersManual.html">LLVM Programmer's Manual</a>, it is treated - separately here because of the volume of content and because it is often an - early source of bewilderment for new developers.</p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="general">General Concepts</a></h2> -<!-- *********************************************************************** --> - -<div> - <p>The LLVM Makefile System is the component of LLVM that is responsible for - building the software, testing it, generating distributions, checking those - distributions, installing and uninstalling, etc. It consists of a several - files throughout the source tree. These files and other general concepts are - described in this section.</p> - -<!-- ======================================================================= --> -<h3><a name="projects">Projects</a></h3> -<div> - <p>The LLVM Makefile System is quite generous. It not only builds its own - software, but it can build yours too. Built into the system is knowledge of - the <tt>llvm/projects</tt> directory. Any directory under <tt>projects</tt> - that has both a <tt>configure</tt> script and a <tt>Makefile</tt> is assumed - to be a project that uses the LLVM Makefile system. Building software that - uses LLVM does not require the LLVM Makefile System nor even placement in the - <tt>llvm/projects</tt> directory. However, doing so will allow your project - to get up and running quickly by utilizing the built-in features that are used - to compile LLVM. LLVM compiles itself using the same features of the makefile - system as used for projects.</p> - <p>For complete details on setting up your projects configuration, simply - mimic the <tt>llvm/projects/sample</tt> project or for further details, - consult the <a href="Projects.html">Projects.html</a> page.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="varvalues">Variable Values</a></h3> -<div> - <p>To use the makefile system, you simply create a file named - <tt>Makefile</tt> in your directory and declare values for certain variables. - The variables and values that you select determine what the makefile system - will do. These variables enable rules and processing in the makefile system - that automatically Do The Right Thing™. -</div> - -<!-- ======================================================================= --> -<h3><a name="including">Including Makefiles</a></h3> -<div> - <p>Setting variables alone is not enough. You must include into your Makefile - additional files that provide the rules of the LLVM Makefile system. The - various files involved are described in the sections that follow.</p> - -<!-- ======================================================================= --> -<h4><a name="Makefile">Makefile</a></h4> -<div> - <p>Each directory to participate in the build needs to have a file named - <tt>Makefile</tt>. This is the file first read by <tt>make</tt>. It has three - sections:</p> - <ol> - <li><a href="#setvars">Settable Variables</a> - Required that must be set - first.</li> - <li><a href="#Makefile.common">include <tt>$(LEVEL)/Makefile.common</tt></a> - - include the LLVM Makefile system. - <li><a href="#overvars">Override Variables</a> - Override variables set by - the LLVM Makefile system. - </ol> -</div> - -<!-- ======================================================================= --> -<h4><a name="Makefile.common">Makefile.common</a></h4> -<div> - <p>Every project must have a <tt>Makefile.common</tt> file at its top source - directory. This file serves three purposes:</p> - <ol> - <li>It includes the project's configuration makefile to obtain values - determined by the <tt>configure</tt> script. This is done by including the - <a href="#Makefile.config"><tt>$(LEVEL)/Makefile.config</tt></a> file.</li> - <li>It specifies any other (static) values that are needed throughout the - project. Only values that are used in all or a large proportion of the - project's directories should be placed here.</li> - <li>It includes the standard rules for the LLVM Makefile system, - <a href="#Makefile.rules"><tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt></a>. - This file is the "guts" of the LLVM Makefile system.</li> - </ol> -</div> - -<!-- ======================================================================= --> -<h4><a name="Makefile.config">Makefile.config</a></h4> -<div> - <p>Every project must have a <tt>Makefile.config</tt> at the top of its - <em>build</em> directory. This file is <b>generated</b> by the - <tt>configure</tt> script from the pattern provided by the - <tt>Makefile.config.in</tt> file located at the top of the project's - <em>source</em> directory. The contents of this file depend largely on what - configuration items the project uses, however most projects can get what they - need by just relying on LLVM's configuration found in - <tt>$(LLVM_OBJ_ROOT)/Makefile.config</tt>. -</div> - -<!-- ======================================================================= --> -<h4><a name="Makefile.rules">Makefile.rules</a></h4> -<div> - <p>This file, located at <tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt> is the heart - of the LLVM Makefile System. It provides all the logic, dependencies, and - rules for building the targets supported by the system. What it does largely - depends on the values of <tt>make</tt> <a href="#variables">variables</a> that - have been set <em>before</em> <tt>Makefile.rules</tt> is included. -</div> - -</div> - -<!-- ======================================================================= --> -<h3><a name="Comments">Comments</a></h3> -<div> - <p>User Makefiles need not have comments in them unless the construction is - unusual or it does not strictly follow the rules and patterns of the LLVM - makefile system. Makefile comments are invoked with the pound (#) character. - The # character and any text following it, to the end of the line, are ignored - by <tt>make</tt>.</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="tutorial">Tutorial</a></h2> -<!-- *********************************************************************** --> -<div> - <p>This section provides some examples of the different kinds of modules you - can build with the LLVM makefile system. In general, each directory you - provide will build a single object although that object may be composed of - additionally compiled components.</p> - -<!-- ======================================================================= --> -<h3><a name="libraries">Libraries</a></h3> -<div> - <p>Only a few variable definitions are needed to build a regular library. - Normally, the makefile system will build all the software into a single - <tt>libname.o</tt> (pre-linked) object. This means the library is not - searchable and that the distinction between compilation units has been - dissolved. Optionally, you can ask for a shared library (.so) or archive - library (.a) built. Archive libraries are the default. For example:</p> - <pre><tt> - LIBRARYNAME = mylib - SHARED_LIBRARY = 1 - ARCHIVE_LIBRARY = 1 - </tt></pre> - <p>says to build a library named "mylib" with both a shared library - (<tt>mylib.so</tt>) and an archive library (<tt>mylib.a</tt>) version. The - contents of all the - libraries produced will be the same, they are just constructed differently. - Note that you normally do not need to specify the sources involved. The LLVM - Makefile system will infer the source files from the contents of the source - directory.</p> - <p>The <tt>LOADABLE_MODULE=1</tt> directive can be used in conjunction with - <tt>SHARED_LIBRARY=1</tt> to indicate that the resulting shared library should - be openable with the <tt>dlopen</tt> function and searchable with the - <tt>dlsym</tt> function (or your operating system's equivalents). While this - isn't strictly necessary on Linux and a few other platforms, it is required - on systems like HP-UX and Darwin. You should use <tt>LOADABLE_MODULE</tt> for - any shared library that you intend to be loaded into an tool via the - <tt>-load</tt> option. See the - <a href="WritingAnLLVMPass.html#makefile">WritingAnLLVMPass.html</a> document - for an example of why you might want to do this. - -<!-- ======================================================================= --> -<h4><a name="BCModules">Bitcode Modules</a></h4> -<div> - <p>In some situations, it is desirable to build a single bitcode module from - a variety of sources, instead of an archive, shared library, or bitcode - library. Bitcode modules can be specified in addition to any of the other - types of libraries by defining the <a href="#MODULE_NAME">MODULE_NAME</a> - variable. For example:</p> - <pre><tt> - LIBRARYNAME = mylib - BYTECODE_LIBRARY = 1 - MODULE_NAME = mymod - </tt></pre> - <p>will build a module named <tt>mymod.bc</tt> from the sources in the - directory. This module will be an aggregation of all the bitcode modules - derived from the sources. The example will also build a bitcode archive - containing a bitcode module for each compiled source file. The difference is - subtle, but important depending on how the module or library is to be linked. - </p> -</div> - -<!-- ======================================================================= --> -<h4> - <a name="LoadableModules">Loadable Modules</a> -</h4> -<div> - <p>In some situations, you need to create a loadable module. Loadable modules - can be loaded into programs like <tt>opt</tt> or <tt>llc</tt> to specify - additional passes to run or targets to support. Loadable modules are also - useful for debugging a pass or providing a pass with another package if that - pass can't be included in LLVM.</p> - <p>LLVM provides complete support for building such a module. All you need to - do is use the LOADABLE_MODULE variable in your Makefile. For example, to - build a loadable module named <tt>MyMod</tt> that uses the LLVM libraries - <tt>LLVMSupport.a</tt> and <tt>LLVMSystem.a</tt>, you would specify:</p> - <pre><tt> - LIBRARYNAME := MyMod - LOADABLE_MODULE := 1 - LINK_COMPONENTS := support system - </tt></pre> - <p>Use of the <tt>LOADABLE_MODULE</tt> facility implies several things:</p> - <ol> - <li>There will be no "lib" prefix on the module. This differentiates it from - a standard shared library of the same name.</li> - <li>The <a href="#SHARED_LIBRARY">SHARED_LIBRARY</a> variable is turned - on.</li> - <li>The <a href="#LINK_LIBS_IN_SHARED">LINK_LIBS_IN_SHARED</a> variable - is turned on.</li> - </ol> - <p>A loadable module is loaded by LLVM via the facilities of libtool's libltdl - library which is part of <tt>lib/System</tt> implementation.</p> -</div> - -</div> - -<!-- ======================================================================= --> -<h3><a name="tools">Tools</a></h3> -<div> - <p>For building executable programs (tools), you must provide the name of the - tool and the names of the libraries you wish to link with the tool. For - example:</p> - <pre><tt> - TOOLNAME = mytool - USEDLIBS = mylib - LINK_COMPONENTS = support system - </tt></pre> - <p>says that we are to build a tool name <tt>mytool</tt> and that it requires - three libraries: <tt>mylib</tt>, <tt>LLVMSupport.a</tt> and - <tt>LLVMSystem.a</tt>.</p> - <p>Note that two different variables are use to indicate which libraries are - linked: <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>. This distinction is necessary - to support projects. <tt>LLVMLIBS</tt> refers to the LLVM libraries found in - the LLVM object directory. <tt>USEDLIBS</tt> refers to the libraries built by - your project. In the case of building LLVM tools, <tt>USEDLIBS</tt> and - <tt>LLVMLIBS</tt> can be used interchangeably since the "project" is LLVM - itself and <tt>USEDLIBS</tt> refers to the same place as <tt>LLVMLIBS</tt>. - </p> - <p>Also note that there are two different ways of specifying a library: with a - <tt>.a</tt> suffix and without. Without the suffix, the entry refers to the - re-linked (.o) file which will include <em>all</em> symbols of the library. - This is useful, for example, to include all passes from a library of passes. - If the <tt>.a</tt> suffix is used then the library is linked as a searchable - library (with the <tt>-l</tt> option). In this case, only the symbols that are - unresolved <em>at that point</em> will be resolved from the library, if they - exist. Other (unreferenced) symbols will not be included when the <tt>.a</tt> - syntax is used. Note that in order to use the <tt>.a</tt> suffix, the library - in question must have been built with the <tt>ARCHIVE_LIBRARY</tt> option set. - </p> - -<!-- ======================================================================= --> -<h4><a name="JIT">JIT Tools</a></h4> -<div> - <p>Many tools will want to use the JIT features of LLVM. To do this, you - simply specify that you want an execution 'engine', and the makefiles will - automatically link in the appropriate JIT for the host or an interpreter - if none is available:</p> - <pre><tt> - TOOLNAME = my_jit_tool - USEDLIBS = mylib - LINK_COMPONENTS = engine - </tt></pre> - <p>Of course, any additional libraries may be listed as other components. To - get a full understanding of how this changes the linker command, it is - recommended that you:</p> - <pre><tt> - cd examples/Fibonacci - make VERBOSE=1 - </tt></pre> -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="targets">Targets Supported</a></h2> -<!-- *********************************************************************** --> - -<div> - <p>This section describes each of the targets that can be built using the LLVM - Makefile system. Any target can be invoked from any directory but not all are - applicable to a given directory (e.g. "check", "dist" and "install" will - always operate as if invoked from the top level directory).</p> - - <table style="text-align:left"> - <tr> - <th>Target Name</th><th>Implied Targets</th><th>Target Description</th> - </tr> - <tr><td><a href="#all"><tt>all</tt></a></td><td></td> - <td>Compile the software recursively. Default target. - </td></tr> - <tr><td><a href="#all-local"><tt>all-local</tt></a></td><td></td> - <td>Compile the software in the local directory only. - </td></tr> - <tr><td><a href="#check"><tt>check</tt></a></td><td></td> - <td>Change to the <tt>test</tt> directory in a project and run the - test suite there. - </td></tr> - <tr><td><a href="#check-local"><tt>check-local</tt></a></td><td></td> - <td>Run a local test suite. Generally this is only defined in the - <tt>Makefile</tt> of the project's <tt>test</tt> directory. - </td></tr> - <tr><td><a href="#clean"><tt>clean</tt></a></td><td></td> - <td>Remove built objects recursively. - </td></tr> - <tr><td><a href="#clean-local"><tt>clean-local</tt></a></td><td></td> - <td>Remove built objects from the local directory only. - </td></tr> - <tr><td><a href="#dist"><tt>dist</tt></a></td><td>all</td> - <td>Prepare a source distribution tarball. - </td></tr> - <tr><td><a href="#dist-check"><tt>dist-check</tt></a></td><td>all</td> - <td>Prepare a source distribution tarball and check that it builds. - </td></tr> - <tr><td><a href="#dist-clean"><tt>dist-clean</tt></a></td><td>clean</td> - <td>Clean source distribution tarball temporary files. - </td></tr> - <tr><td><a href="#install"><tt>install</tt></a></td><td>all</td> - <td>Copy built objects to installation directory. - </td></tr> - <tr><td><a href="#preconditions"><tt>preconditions</tt></a></td><td>all</td> - <td>Check to make sure configuration and makefiles are up to date. - </td></tr> - <tr><td><a href="#printvars"><tt>printvars</tt></a></td><td>all</td> - <td>Prints variables defined by the makefile system (for debugging). - </td></tr> - <tr><td><a href="#tags"><tt>tags</tt></a></td><td></td> - <td>Make C and C++ tags files for emacs and vi. - </td></tr> - <tr><td><a href="#uninstall"><tt>uninstall</tt></a></td><td></td> - <td>Remove built objects from installation directory. - </td></tr> - </table> - -<!-- ======================================================================= --> -<h3><a name="all">all (default)</a></h3> -<div> - <p>When you invoke <tt>make</tt> with no arguments, you are implicitly - instructing it to seek the "all" target (goal). This target is used for - building the software recursively and will do different things in different - directories. For example, in a <tt>lib</tt> directory, the "all" target will - compile source files and generate libraries. But, in a <tt>tools</tt> - directory, it will link libraries and generate executables.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="all-local">all-local</a></h3> -<div> - <p>This target is the same as <a href="#all">all</a> but it operates only on - the current directory instead of recursively.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="check">check</a></h3> -<div> - <p>This target can be invoked from anywhere within a project's directories - but always invokes the <a href="#check-local"><tt>check-local</tt></a> target - in the project's <tt>test</tt> directory, if it exists and has a - <tt>Makefile</tt>. A warning is produced otherwise. If - <a href="#TESTSUITE"><tt>TESTSUITE</tt></a> is defined on the <tt>make</tt> - command line, it will be passed down to the invocation of - <tt>make check-local</tt> in the <tt>test</tt> directory. The intended usage - for this is to assist in running specific suites of tests. If - <tt>TESTSUITE</tt> is not set, the implementation of <tt>check-local</tt> - should run all normal tests. It is up to the project to define what - different values for <tt>TESTSUTE</tt> will do. See the - <a href="TestingGuide.html">TestingGuide</a> for further details.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="check-local">check-local</a></h3> -<div> - <p>This target should be implemented by the <tt>Makefile</tt> in the project's - <tt>test</tt> directory. It is invoked by the <tt>check</tt> target elsewhere. - Each project is free to define the actions of <tt>check-local</tt> as - appropriate for that project. The LLVM project itself uses dejagnu to run a - suite of feature and regresson tests. Other projects may choose to use - dejagnu or any other testing mechanism.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="clean">clean</a></h3> -<div> - <p>This target cleans the build directory, recursively removing all things - that the Makefile builds. The cleaning rules have been made guarded so they - shouldn't go awry (via <tt>rm -f $(UNSET_VARIABLE)/*</tt> which will attempt - to erase the entire directory structure.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="clean-local">clean-local</a></h3> -<div> - <p>This target does the same thing as <tt>clean</tt> but only for the current - (local) directory.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="dist">dist</a></h3> -<div> - <p>This target builds a distribution tarball. It first builds the entire - project using the <tt>all</tt> target and then tars up the necessary files and - compresses it. The generated tarball is sufficient for a casual source - distribution, but probably not for a release (see <tt>dist-check</tt>).</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="dist-check">dist-check</a></h3> -<div> - <p>This target does the same thing as the <tt>dist</tt> target but also checks - the distribution tarball. The check is made by unpacking the tarball to a new - directory, configuring it, building it, installing it, and then verifying that - the installation results are correct (by comparing to the original build). - This target can take a long time to run but should be done before a release - goes out to make sure that the distributed tarball can actually be built into - a working release.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="dist-clean">dist-clean</a></h3> -<div> - <p>This is a special form of the <tt>clean</tt> clean target. It performs a - normal <tt>clean</tt> but also removes things pertaining to building the - distribution.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="install">install</a></h3> -<div> - <p>This target finalizes shared objects and executables and copies all - libraries, headers, executables and documentation to the directory given - with the <tt>--prefix</tt> option to <tt>configure</tt>. When completed, - the prefix directory will have everything needed to <b>use</b> LLVM. </p> - <p>The LLVM makefiles can generate complete <b>internal</b> documentation - for all the classes by using <tt>doxygen</tt>. By default, this feature is - <b>not</b> enabled because it takes a long time and generates a massive - amount of data (>100MB). If you want this feature, you must configure LLVM - with the --enable-doxygen switch and ensure that a modern version of doxygen - (1.3.7 or later) is available in your <tt>PATH</tt>. You can download - doxygen from - <a href="http://www.stack.nl/~dimitri/doxygen/download.html#latestsrc"> - here</a>. -</div> - -<!-- ======================================================================= --> -<h3><a name="preconditions">preconditions</a></h3> -<div> - <p>This utility target checks to see if the <tt>Makefile</tt> in the object - directory is older than the <tt>Makefile</tt> in the source directory and - copies it if so. It also reruns the <tt>configure</tt> script if that needs to - be done and rebuilds the <tt>Makefile.config</tt> file similarly. Users may - overload this target to ensure that sanity checks are run <em>before</em> any - building of targets as all the targets depend on <tt>preconditions</tt>.</p> -</div> - -<!-- ======================================================================= --> -<h3><a name="printvars">printvars</a></h3> -<div> - <p>This utility target just causes the LLVM makefiles to print out some of - the makefile variables so that you can double check how things are set. </p> -</div> - -<!-- ======================================================================= --> -<h3><a name="reconfigure">reconfigure</a></h3> -<div> - <p>This utility target will force a reconfigure of LLVM or your project. It - simply runs <tt>$(PROJ_OBJ_ROOT)/config.status --recheck</tt> to rerun the - configuration tests and rebuild the configured files. This isn't generally - useful as the makefiles will reconfigure themselves whenever its necessary. - </p> -</div> - -<!-- ======================================================================= --> -<h3><a name="spotless">spotless</a></h3> -<div> - <p>This utility target, only available when <tt>$(PROJ_OBJ_ROOT)</tt> is not - the same as <tt>$(PROJ_SRC_ROOT)</tt>, will completely clean the - <tt>$(PROJ_OBJ_ROOT)</tt> directory by removing its content entirely and - reconfiguring the directory. This returns the <tt>$(PROJ_OBJ_ROOT)</tt> - directory to a completely fresh state. All content in the directory except - configured files and top-level makefiles will be lost.</p> - <div class="doc_warning"><p>Use with caution.</p></div> -</div> - -<!-- ======================================================================= --> -<h3><a name="tags">tags</a></h3> -<div> - <p>This target will generate a <tt>TAGS</tt> file in the top-level source - directory. It is meant for use with emacs, XEmacs, or ViM. The TAGS file - provides an index of symbol definitions so that the editor can jump you to the - definition quickly. </p> -</div> - -<!-- ======================================================================= --> -<h3><a name="uninstall">uninstall</a></h3> -<div> - <p>This target is the opposite of the <tt>install</tt> target. It removes the - header, library and executable files from the installation directories. Note - that the directories themselves are not removed because it is not guaranteed - that LLVM is the only thing installing there (e.g. --prefix=/usr).</p> -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="variables">Variables</a></h2> -<!-- *********************************************************************** --> -<div> - <p>Variables are used to tell the LLVM Makefile System what to do and to - obtain information from it. Variables are also used internally by the LLVM - Makefile System. Variable names that contain only the upper case alphabetic - letters and underscore are intended for use by the end user. All other - variables are internal to the LLVM Makefile System and should not be relied - upon nor modified. The sections below describe how to use the LLVM Makefile - variables.</p> - -<!-- ======================================================================= --> -<h3><a name="setvars">Control Variables</a></h3> -<div> - <p>Variables listed in the table below should be set <em>before</em> the - inclusion of <a href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>. - These variables provide input to the LLVM make system that tell it what to do - for the current directory.</p> - <dl> - <dt><a name="BUILD_ARCHIVE"><tt>BUILD_ARCHIVE</tt></a></dt> - <dd>If set to any value, causes an archive (.a) library to be built.</dd> - <dt><a name="BUILT_SOURCES"><tt>BUILT_SOURCES</tt></a></dt> - <dd>Specifies a set of source files that are generated from other source - files. These sources will be built before any other target processing to - ensure they are present.</dd> - <dt><a name="BYTECODE_LIBRARY"><tt>BYTECODE_LIBRARY</tt></a></dt> - <dd>If set to any value, causes a bitcode library (.bc) to be built.</dd> - <dt><a name="CONFIG_FILES"><tt>CONFIG_FILES</tt></a></dt> - <dd>Specifies a set of configuration files to be installed.</dd> - <dt><a name="DEBUG_SYMBOLS"><tt>DEBUG_SYMBOLS</tt></a></dt> - <dd>If set to any value, causes the build to include debugging - symbols even in optimized objects, libraries and executables. This - alters the flags specified to the compilers and linkers. Debugging - isn't fun in an optimized build, but it is possible.</dd> - <dt><a name="DIRS"><tt>DIRS</tt></a></dt> - <dd>Specifies a set of directories, usually children of the current - directory, that should also be made using the same goal. These directories - will be built serially.</dd> - <dt><a name="DISABLE_AUTO_DEPENDENCIES"><tt>DISABLE_AUTO_DEPENDENCIES</tt></a></dt> - <dd>If set to any value, causes the makefiles to <b>not</b> automatically - generate dependencies when running the compiler. Use of this feature is - discouraged and it may be removed at a later date.</dd> - <dt><a name="ENABLE_OPTIMIZED"><tt>ENABLE_OPTIMIZED</tt></a></dt> - <dd>If set to 1, causes the build to generate optimized objects, - libraries and executables. This alters the flags specified to the compilers - and linkers. Generally debugging won't be a fun experience with an optimized - build.</dd> - <dt><a name="ENABLE_PROFILING"><tt>ENABLE_PROFILING</tt></a></dt> - <dd>If set to 1, causes the build to generate both optimized and - profiled objects, libraries and executables. This alters the flags specified - to the compilers and linkers to ensure that profile data can be collected - from the tools built. Use the <tt>gprof</tt> tool to analyze the output from - the profiled tools (<tt>gmon.out</tt>).</dd> - <dt><a name="DISABLE_ASSERTIONS"><tt>DISABLE_ASSERTIONS</tt></a></dt> - <dd>If set to 1, causes the build to disable assertions, even if - building a debug or profile build. This will exclude all assertion check - code from the build. LLVM will execute faster, but with little help when - things go wrong.</dd> - <dt><a name="EXPERIMENTAL_DIRS"><tt>EXPERIMENTAL_DIRS</tt></a></dt> - <dd>Specify a set of directories that should be built, but if they fail, it - should not cause the build to fail. Note that this should only be used - temporarily while code is being written.</dd> - <dt><a name="EXPORTED_SYMBOL_FILE"><tt>EXPORTED_SYMBOL_FILE</tt></a></dt> - <dd>Specifies the name of a single file that contains a list of the - symbols to be exported by the linker. One symbol per line.</dd> - <dt><a name="EXPORTED_SYMBOL_LIST"><tt>EXPORTED_SYMBOL_LIST</tt></a></dt> - <dd>Specifies a set of symbols to be exported by the linker.</dd> - <dt><a name="EXTRA_DIST"><tt>EXTRA_DIST</tt></a></dt> - <dd>Specifies additional files that should be distributed with LLVM. All - source files, all built sources, all Makefiles, and most documentation files - will be automatically distributed. Use this variable to distribute any - files that are not automatically distributed.</dd> - <dt><a name="KEEP_SYMBOLS"><tt>KEEP_SYMBOLS</tt></a></dt> - <dd>If set to any value, specifies that when linking executables the - makefiles should retain debug symbols in the executable. Normally, symbols - are stripped from the executable.</dd> - <dt><a name="LEVEL"><tt>LEVEL</tt></a><small>(required)</small></dt> - <dd>Specify the level of nesting from the top level. This variable must be - set in each makefile as it is used to find the top level and thus the other - makefiles.</dd> - <dt><a name="LIBRARYNAME"><tt>LIBRARYNAME</tt></a></dt> - <dd>Specify the name of the library to be built. (Required For - Libraries)</dd> - <dt><a name="LINK_COMPONENTS"><tt>LINK_COMPONENTS</tt></a></dt> - <dd>When specified for building a tool, the value of this variable will be - passed to the <tt>llvm-config</tt> tool to generate a link line for the - tool. Unlike <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>, not all libraries need - to be specified. The <tt>llvm-config</tt> tool will figure out the library - dependencies and add any libraries that are needed. The <tt>USEDLIBS</tt> - variable can still be used in conjunction with <tt>LINK_COMPONENTS</tt> so - that additional project-specific libraries can be linked with the LLVM - libraries specified by <tt>LINK_COMPONENTS</tt></dd> - <dt><a name="LINK_LIBS_IN_SHARED"><tt>LINK_LIBS_IN_SHARED</tt></a></dt> - <dd>By default, shared library linking will ignore any libraries specified - with the <a href="LLVMLIBS">LLVMLIBS</a> or <a href="USEDLIBS">USEDLIBS</a>. - This prevents shared libs from including things that will be in the LLVM - tool the shared library will be loaded into. However, sometimes it is useful - to link certain libraries into your shared library and this option enables - that feature.</dd> - <dt><a name="LLVMLIBS"><tt>LLVMLIBS</tt></a></dt> - <dd>Specifies the set of libraries from the LLVM $(ObjDir) that will be - linked into the tool or library.</dd> - <dt><a name="LOADABLE_MODULE"><tt>LOADABLE_MODULE</tt></a></dt> - <dd>If set to any value, causes the shared library being built to also be - a loadable module. Loadable modules can be opened with the dlopen() function - and searched with dlsym (or the operating system's equivalent). Note that - setting this variable without also setting <tt>SHARED_LIBRARY</tt> will have - no effect.</dd> - <dt><a name="MODULE_NAME"><tt>MODULE_NAME</tt></a></dt> - <dd>Specifies the name of a bitcode module to be created. A bitcode - module can be specified in conjunction with other kinds of library builds - or by itself. It constructs from the sources a single linked bitcode - file.</dd> - <dt><a name="NO_INSTALL"><tt>NO_INSTALL</tt></a></dt> - <dd>Specifies that the build products of the directory should not be - installed but should be built even if the <tt>install</tt> target is given. - This is handy for directories that build libraries or tools that are only - used as part of the build process, such as code generators (e.g. - <tt>tblgen</tt>).</dd> - <dt><a name="OPTIONAL_DIRS"><tt>OPTIONAL_DIRS</tt></a></dt> - <dd>Specify a set of directories that may be built, if they exist, but its - not an error for them not to exist.</dd> - <dt><a name="PARALLEL_DIRS"><tt>PARALLEL_DIRS</tt></a></dt> - <dd>Specify a set of directories to build recursively and in parallel if - the -j option was used with <tt>make</tt>.</dd> - <dt><a name="SHARED_LIBRARY"><tt>SHARED_LIBRARY</tt></a></dt> - <dd>If set to any value, causes a shared library (.so) to be built in - addition to any other kinds of libraries. Note that this option will cause - all source files to be built twice: once with options for position - independent code and once without. Use it only where you really need a - shared library.</dd> - <dt><a name="SOURCES"><tt>SOURCES</tt><small>(optional)</small></a></dt> - <dd>Specifies the list of source files in the current directory to be - built. Source files of any type may be specified (programs, documentation, - config files, etc.). If not specified, the makefile system will infer the - set of source files from the files present in the current directory.</dd> - <dt><a name="SUFFIXES"><tt>SUFFIXES</tt></a></dt> - <dd>Specifies a set of filename suffixes that occur in suffix match rules. - Only set this if your local <tt>Makefile</tt> specifies additional suffix - match rules.</dd> - <dt><a name="TARGET"><tt>TARGET</tt></a></dt> - <dd>Specifies the name of the LLVM code generation target that the - current directory builds. Setting this variable enables additional rules to - build <tt>.inc</tt> files from <tt>.td</tt> files. </dd> - <dt><a name="TESTSUITE"><tt>TESTSUITE</tt></a></dt> - <dd>Specifies the directory of tests to run in <tt>llvm/test</tt>.</dd> - <dt><a name="TOOLNAME"><tt>TOOLNAME</tt></a></dt> - <dd>Specifies the name of the tool that the current directory should - build.</dd> - <dt><a name="TOOL_VERBOSE"><tt>TOOL_VERBOSE</tt></a></dt> - <dd>Implies VERBOSE and also tells each tool invoked to be verbose. This is - handy when you're trying to see the sub-tools invoked by each tool invoked - by the makefile. For example, this will pass <tt>-v</tt> to the GCC - compilers which causes it to print out the command lines it uses to invoke - sub-tools (compiler, assembler, linker).</dd> - <dt><a name="USEDLIBS"><tt>USEDLIBS</tt></a></dt> - <dd>Specifies the list of project libraries that will be linked into the - tool or library.</dd> - <dt><a name="VERBOSE"><tt>VERBOSE</tt></a></dt> - <dd>Tells the Makefile system to produce detailed output of what it is doing - instead of just summary comments. This will generate a LOT of output.</dd> - </dl> -</div> - -<!-- ======================================================================= --> -<h3><a name="overvars">Override Variables</a></h3> -<div> - <p>Override variables can be used to override the default - values provided by the LLVM makefile system. These variables can be set in - several ways:</p> - <ul> - <li>In the environment (e.g. setenv, export) -- not recommended.</li> - <li>On the <tt>make</tt> command line -- recommended.</li> - <li>On the <tt>configure</tt> command line</li> - <li>In the Makefile (only <em>after</em> the inclusion of <a - href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>).</li> - </ul> - <p>The override variables are given below:</p> - <dl> - <dt><a name="AR"><tt>AR</tt></a> <small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>ar</tt> tool.</dd> - <dt><a name="PROJ_OBJ_DIR"><tt>PROJ_OBJ_DIR</tt></a></dt> - <dd>The directory into which the products of build rules will be placed. - This might be the same as - <a href="#PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a> but typically is - not.</dd> - <dt><a name="PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a></dt> - <dd>The directory which contains the source files to be built.</dd> - <dt><a name="BUILD_EXAMPLES"><tt>BUILD_EXAMPLES</tt></a></dt> - <dd>If set to 1, build examples in <tt>examples</tt> and (if building - Clang) <tt>tools/clang/examples</tt> directories.</dd> - <dt><a name="BZIP2"><tt>BZIP2</tt></a><small>(configured)</small></dt> - <dd>The path to the <tt>bzip2</tt> tool.</dd> - <dt><a name="CC"><tt>CC</tt></a><small>(configured)</small></dt> - <dd>The path to the 'C' compiler.</dd> - <dt><a name="CFLAGS"><tt>CFLAGS</tt></a></dt> - <dd>Additional flags to be passed to the 'C' compiler.</dd> - <dt><a name="CXX"><tt>CXX</tt></a></dt> - <dd>Specifies the path to the C++ compiler.</dd> - <dt><a name="CXXFLAGS"><tt>CXXFLAGS</tt></a></dt> - <dd>Additional flags to be passed to the C++ compiler.</dd> - <dt><a name="DATE"><tt>DATE<small>(configured)</small></tt></a></dt> - <dd>Specifies the path to the <tt>date</tt> program or any program that can - generate the current date and time on its standard output</dd> - <dt><a name="DOT"><tt>DOT</tt></a><small>(configured)</small></dt> - <dd>Specifies the path to the <tt>dot</tt> tool or <tt>false</tt> if there - isn't one.</dd> - <dt><a name="ECHO"><tt>ECHO</tt></a><small>(configured)</small></dt> - <dd>Specifies the path to the <tt>echo</tt> tool for printing output.</dd> - <dt><a name="EXEEXT"><tt>EXEEXT</tt></a><small>(configured)</small></dt> - <dd>Provides the extension to be used on executables built by the makefiles. - The value may be empty on platforms that do not use file extensions for - executables (e.g. Unix).</dd> - <dt><a name="INSTALL"><tt>INSTALL</tt></a><small>(configured)</small></dt> - <dd>Specifies the path to the <tt>install</tt> tool.</dd> - <dt><a name="LDFLAGS"><tt>LDFLAGS</tt></a><small>(configured)</small></dt> - <dd>Allows users to specify additional flags to pass to the linker.</dd> - <dt><a name="LIBS"><tt>LIBS</tt></a><small>(configured)</small></dt> - <dd>The list of libraries that should be linked with each tool.</dd> - <dt><a name="LIBTOOL"><tt>LIBTOOL</tt></a><small>(configured)</small></dt> - <dd>Specifies the path to the <tt>libtool</tt> tool. This tool is renamed - <tt>mklib</tt> by the <tt>configure</tt> script and always located in the - <dt><a name="LLVMAS"><tt>LLVMAS</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>llvm-as</tt> tool.</dd> - <dt><a name="LLVMCC"><tt>LLVMCC</tt></a></dt> - <dd>Specifies the path to the LLVM capable compiler.</dd> - <dt><a name="LLVMCXX"><tt>LLVMCXX</tt></a></dt> - <dd>Specifies the path to the LLVM C++ capable compiler.</dd> - <dt><a name="LLVMGCC"><tt>LLVMGCC</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the LLVM version of the GCC 'C' Compiler</dd> - <dt><a name="LLVMGXX"><tt>LLVMGXX</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the LLVM version of the GCC C++ Compiler</dd> - <dt><a name="LLVMLD"><tt>LLVMLD</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the LLVM bitcode linker tool</dd> - <dt><a name="LLVM_OBJ_ROOT"><tt>LLVM_OBJ_ROOT</tt></a><small>(configured) - </small></dt> - <dd>Specifies the top directory into which the output of the build is - placed.</dd> - <dt><a name="LLVM_SRC_ROOT"><tt>LLVM_SRC_ROOT</tt></a><small>(configured) - </small></dt> - <dd>Specifies the top directory in which the sources are found.</dd> - <dt><a name="LLVM_TARBALL_NAME"><tt>LLVM_TARBALL_NAME</tt></a> - <small>(configured)</small></dt> - <dd>Specifies the name of the distribution tarball to create. This is - configured from the name of the project and its version number.</dd> - <dt><a name="MKDIR"><tt>MKDIR</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>mkdir</tt> tool that creates - directories.</dd> - <dt><a name="ONLY_TOOLS"><tt>ONLY_TOOLS</tt></a></dt> - <dd>If set, specifies the list of tools to build.</dd> - <dt><a name="PLATFORMSTRIPOPTS"><tt>PLATFORMSTRIPOPTS</tt></a></dt> - <dd>The options to provide to the linker to specify that a stripped (no - symbols) executable should be built.</dd> - <dt><a name="RANLIB"><tt>RANLIB</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>ranlib</tt> tool.</dd> - <dt><a name="RM"><tt>RM</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>rm</tt> tool.</dd> - <dt><a name="SED"><tt>SED</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>sed</tt> tool.</dd> - <dt><a name="SHLIBEXT"><tt>SHLIBEXT</tt></a><small>(configured)</small></dt> - <dd>Provides the filename extension to use for shared libraries.</dd> - <dt><a name="TBLGEN"><tt>TBLGEN</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>tblgen</tt> tool.</dd> - <dt><a name="TAR"><tt>TAR</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>tar</tt> tool.</dd> - <dt><a name="ZIP"><tt>ZIP</tt></a><small>(defaulted)</small></dt> - <dd>Specifies the path to the <tt>zip</tt> tool.</dd> - </dl> -</div> - -<!-- ======================================================================= --> -<h3><a name="getvars">Readable Variables</a></h3> -<div> - <p>Variables listed in the table below can be used by the user's Makefile but - should not be changed. Changing the value will generally cause the build to go - wrong, so don't do it.</p> - <dl> - <dt><a name="bindir"><tt>bindir</tt></a></dt> - <dd>The directory into which executables will ultimately be installed. This - value is derived from the <tt>--prefix</tt> option given to - <tt>configure</tt>.</dd> - <dt><a name="BuildMode"><tt>BuildMode</tt></a></dt> - <dd>The name of the type of build being performed: Debug, Release, or - Profile</dd> - <dt><a name="bitcode_libdir"><tt>bytecode_libdir</tt></a></dt> - <dd>The directory into which bitcode libraries will ultimately be - installed. This value is derived from the <tt>--prefix</tt> option given to - <tt>configure</tt>.</dd> - <dt><a name="ConfigureScriptFLAGS"><tt>ConfigureScriptFLAGS</tt></a></dt> - <dd>Additional flags given to the <tt>configure</tt> script when - reconfiguring.</dd> - <dt><a name="DistDir"><tt>DistDir</tt></a></dt> - <dd>The <em>current</em> directory for which a distribution copy is being - made.</dd> - <dt><a name="Echo"><tt>Echo</tt></a></dt> - <dd>The LLVM Makefile System output command. This provides the - <tt>llvm[n]</tt> prefix and starts with @ so the command itself is not - printed by <tt>make</tt>.</dd> - <dt><a name="EchoCmd"><tt>EchoCmd</tt></a></dt> - <dd> Same as <a href="#Echo"><tt>Echo</tt></a> but without the leading @. - </dd> - <dt><a name="includedir"><tt>includedir</tt></a></dt> - <dd>The directory into which include files will ultimately be installed. - This value is derived from the <tt>--prefix</tt> option given to - <tt>configure</tt>.</dd> - <dt><a name="libdir"><tt>libdir</tt></a></dt><dd></dd> - <dd>The directory into which native libraries will ultimately be installed. - This value is derived from the <tt>--prefix</tt> option given to - <tt>configure</tt>.</dd> - <dt><a name="LibDir"><tt>LibDir</tt></a></dt> - <dd>The configuration specific directory into which libraries are placed - before installation.</dd> - <dt><a name="MakefileConfig"><tt>MakefileConfig</tt></a></dt> - <dd>Full path of the <tt>Makefile.config</tt> file.</dd> - <dt><a name="MakefileConfigIn"><tt>MakefileConfigIn</tt></a></dt> - <dd>Full path of the <tt>Makefile.config.in</tt> file.</dd> - <dt><a name="ObjDir"><tt>ObjDir</tt></a></dt> - <dd>The configuration and directory specific directory where build objects - (compilation results) are placed.</dd> - <dt><a name="SubDirs"><tt>SubDirs</tt></a></dt> - <dd>The complete list of sub-directories of the current directory as - specified by other variables.</dd> - <dt><a name="Sources"><tt>Sources</tt></a></dt> - <dd>The complete list of source files.</dd> - <dt><a name="sysconfdir"><tt>sysconfdir</tt></a></dt> - <dd>The directory into which configuration files will ultimately be - installed. This value is derived from the <tt>--prefix</tt> option given to - <tt>configure</tt>.</dd> - <dt><a name="ToolDir"><tt>ToolDir</tt></a></dt> - <dd>The configuration specific directory into which executables are placed - before they are installed.</dd> - <dt><a name="TopDistDir"><tt>TopDistDir</tt></a></dt> - <dd>The top most directory into which the distribution files are copied. - </dd> - <dt><a name="Verb"><tt>Verb</tt></a></dt> - <dd>Use this as the first thing on your build script lines to enable or - disable verbose mode. It expands to either an @ (quiet mode) or nothing - (verbose mode). </dd> - </dl> -</div> - -<!-- ======================================================================= --> -<h3><a name="intvars">Internal Variables</a></h3> -<div> - <p>Variables listed below are used by the LLVM Makefile System - and considered internal. You should not use these variables under any - circumstances.</p> - <p><tt> - Archive - AR.Flags - BaseNameSources - BCCompile.C - BCCompile.CXX - BCLinkLib - C.Flags - Compile.C - CompileCommonOpts - Compile.CXX - ConfigStatusScript - ConfigureScript - CPP.Flags - CPP.Flags - CXX.Flags - DependFiles - DestArchiveLib - DestBitcodeLib - DestModule - DestSharedLib - DestTool - DistAlways - DistCheckDir - DistCheckTop - DistFiles - DistName - DistOther - DistSources - DistSubDirs - DistTarBZ2 - DistTarGZip - DistZip - ExtraLibs - FakeSources - INCFiles - InternalTargets - LD.Flags - LibName.A - LibName.BC - LibName.LA - LibName.O - LibTool.Flags - Link - LinkModule - LLVMLibDir - LLVMLibsOptions - LLVMLibsPaths - LLVMToolDir - LLVMUsedLibs - LocalTargets - Module - ObjectsBC - ObjectsLO - ObjectsO - ObjMakefiles - ParallelTargets - PreConditions - ProjLibsOptions - ProjLibsPaths - ProjUsedLibs - Ranlib - RecursiveTargets - SrcMakefiles - Strip - StripWarnMsg - TableGen - TDFiles - ToolBuildPath - TopLevelTargets - UserTargets - </tt></p> -</div> - -</div> - -<!-- *********************************************************************** --> -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/MakefileGuide.rst b/docs/MakefileGuide.rst new file mode 100644 index 0000000000..d2bdd24a9e --- /dev/null +++ b/docs/MakefileGuide.rst @@ -0,0 +1,956 @@ +.. _makefile_guide: + +=================== +LLVM Makefile Guide +=================== + +.. contents:: + :local: + +Introduction +============ + +This document provides *usage* information about the LLVM makefile system. While +loosely patterned after the BSD makefile system, LLVM has taken a departure from +BSD in order to implement additional features needed by LLVM. Although makefile +systems, such as ``automake``, were attempted at one point, it has become clear +that the features needed by LLVM and the ``Makefile`` norm are too great to use +a more limited tool. Consequently, LLVM requires simply GNU Make 3.79, a widely +portable makefile processor. LLVM unabashedly makes heavy use of the features of +GNU Make so the dependency on GNU Make is firm. If you're not familiar with +``make``, it is recommended that you read the `GNU Makefile Manual +<http://www.gnu.org/software/make/manual/make.html>`_. + +While this document is rightly part of the `LLVM Programmer's +Manual <ProgrammersManual.html>`_, it is treated separately here because of the +volume of content and because it is often an early source of bewilderment for +new developers. + +General Concepts +================ + +The LLVM Makefile System is the component of LLVM that is responsible for +building the software, testing it, generating distributions, checking those +distributions, installing and uninstalling, etc. It consists of a several files +throughout the source tree. These files and other general concepts are described +in this section. + +Projects +-------- + +The LLVM Makefile System is quite generous. It not only builds its own software, +but it can build yours too. Built into the system is knowledge of the +``llvm/projects`` directory. Any directory under ``projects`` that has both a +``configure`` script and a ``Makefile`` is assumed to be a project that uses the +LLVM Makefile system. Building software that uses LLVM does not require the +LLVM Makefile System nor even placement in the ``llvm/projects`` +directory. However, doing so will allow your project to get up and running +quickly by utilizing the built-in features that are used to compile LLVM. LLVM +compiles itself using the same features of the makefile system as used for +projects. + +For complete details on setting up your projects configuration, simply mimic the +``llvm/projects/sample`` project. Or for further details, consult the +`Projects <Projects.html>`_ page. + +Variable Values +--------------- + +To use the makefile system, you simply create a file named ``Makefile`` in your +directory and declare values for certain variables. The variables and values +that you select determine what the makefile system will do. These variables +enable rules and processing in the makefile system that automatically Do The +Right Thing™. + +Including Makefiles +------------------- + +Setting variables alone is not enough. You must include into your Makefile +additional files that provide the rules of the LLVM Makefile system. The various +files involved are described in the sections that follow. + +``Makefile`` +^^^^^^^^^^^^ + +Each directory to participate in the build needs to have a file named +``Makefile``. This is the file first read by ``make``. It has three +sections: + +#. Settable Variables --- Required that must be set first. +#. ``include $(LEVEL)/Makefile.common`` --- include the LLVM Makefile system. +#. Override Variables --- Override variables set by the LLVM Makefile system. + +.. _$(LEVEL)/Makefile.common: + +``Makefile.common`` +^^^^^^^^^^^^^^^^^^^ + +Every project must have a ``Makefile.common`` file at its top source +directory. This file serves three purposes: + +#. It includes the project's configuration makefile to obtain values determined + by the ``configure`` script. This is done by including the + `$(LEVEL)/Makefile.config`_ file. + +#. It specifies any other (static) values that are needed throughout the + project. Only values that are used in all or a large proportion of the + project's directories should be placed here. + +#. It includes the standard rules for the LLVM Makefile system, + `$(LLVM_SRC_ROOT)/Makefile.rules`_. This file is the *guts* of the LLVM + ``Makefile`` system. + +.. _$(LEVEL)/Makefile.config: + +``Makefile.config`` +^^^^^^^^^^^^^^^^^^^ + +Every project must have a ``Makefile.config`` at the top of its *build* +directory. This file is **generated** by the ``configure`` script from the +pattern provided by the ``Makefile.config.in`` file located at the top of the +project's *source* directory. The contents of this file depend largely on what +configuration items the project uses, however most projects can get what they +need by just relying on LLVM's configuration found in +``$(LLVM_OBJ_ROOT)/Makefile.config``. + +.. _$(LLVM_SRC_ROOT)/Makefile.rules: + +``Makefile.rules`` +^^^^^^^^^^^^^^^^^^ + +This file, located at ``$(LLVM_SRC_ROOT)/Makefile.rules`` is the heart of the +LLVM Makefile System. It provides all the logic, dependencies, and rules for +building the targets supported by the system. What it does largely depends on +the values of ``make`` `variables`_ that have been set *before* +``Makefile.rules`` is included. + +Comments +^^^^^^^^ + +User ``Makefile``\s need not have comments in them unless the construction is +unusual or it does not strictly follow the rules and patterns of the LLVM +makefile system. Makefile comments are invoked with the pound (``#``) character. +The ``#`` character and any text following it, to the end of the line, are +ignored by ``make``. + +Tutorial +======== + +This section provides some examples of the different kinds of modules you can +build with the LLVM makefile system. In general, each directory you provide will +build a single object although that object may be composed of additionally +compiled components. + +Libraries +--------- + +Only a few variable definitions are needed to build a regular library. +Normally, the makefile system will build all the software into a single +``libname.o`` (pre-linked) object. This means the library is not searchable and +that the distinction between compilation units has been dissolved. Optionally, +you can ask for a shared library (.so) or archive library (.a) built. Archive +libraries are the default. For example: + +.. code-block:: makefile + + LIBRARYNAME = mylib + SHARED_LIBRARY = 1 + ARCHIVE_LIBRARY = 1 + +says to build a library named ``mylib`` with both a shared library +(``mylib.so``) and an archive library (``mylib.a``) version. The contents of all +the libraries produced will be the same, they are just constructed differently. +Note that you normally do not need to specify the sources involved. The LLVM +Makefile system will infer the source files from the contents of the source +directory. + +The ``LOADABLE_MODULE=1`` directive can be used in conjunction with +``SHARED_LIBRARY=1`` to indicate that the resulting shared library should be +openable with the ``dlopen`` function and searchable with the ``dlsym`` function +(or your operating system's equivalents). While this isn't strictly necessary on +Linux and a few other platforms, it is required on systems like HP-UX and +Darwin. You should use ``LOADABLE_MODULE`` for any shared library that you +intend to be loaded into an tool via the ``-load`` option. See the +`WritingAnLLVMPass.html <WritingAnLLVMPass.html#makefile>`_ document for an +example of why you might want to do this. + +Bitcode Modules +^^^^^^^^^^^^^^^ + +In some situations, it is desirable to build a single bitcode module from a +variety of sources, instead of an archive, shared library, or bitcode +library. Bitcode modules can be specified in addition to any of the other types +of libraries by defining the `MODULE_NAME`_ variable. For example: + +.. code-block:: makefile + + LIBRARYNAME = mylib + BYTECODE_LIBRARY = 1 + MODULE_NAME = mymod + +will build a module named ``mymod.bc`` from the sources in the directory. This +module will be an aggregation of all the bitcode modules derived from the +sources. The example will also build a bitcode archive containing a bitcode +module for each compiled source file. The difference is subtle, but important +depending on how the module or library is to be linked. + +Loadable Modules +^^^^^^^^^^^^^^^^ + +In some situations, you need to create a loadable module. Loadable modules can +be loaded into programs like ``opt`` or ``llc`` to specify additional passes to +run or targets to support. Loadable modules are also useful for debugging a +pass or providing a pass with another package if that pass can't be included in +LLVM. + +LLVM provides complete support for building such a module. All you need to do is +use the ``LOADABLE_MODULE`` variable in your ``Makefile``. For example, to build +a loadable module named ``MyMod`` that uses the LLVM libraries ``LLVMSupport.a`` +and ``LLVMSystem.a``, you would specify: + +.. code-block:: makefile + + LIBRARYNAME := MyMod + LOADABLE_MODULE := 1 + LINK_COMPONENTS := support system + +Use of the ``LOADABLE_MODULE`` facility implies several things: + +#. There will be no "``lib``" prefix on the module. This differentiates it from + a standard shared library of the same name. + +#. The `SHARED_LIBRARY`_ variable is turned on. + +#. The `LINK_LIBS_IN_SHARED`_ variable is turned on. + +A loadable module is loaded by LLVM via the facilities of libtool's libltdl +library which is part of ``lib/System`` implementation. + +Tools +----- + +For building executable programs (tools), you must provide the name of the tool +and the names of the libraries you wish to link with the tool. For example: + +.. code-block:: makefile + + TOOLNAME = mytool + USEDLIBS = mylib + LINK_COMPONENTS = support system + +says that we are to build a tool name ``mytool`` and that it requires three +libraries: ``mylib``, ``LLVMSupport.a`` and ``LLVMSystem.a``. + +Note that two different variables are use to indicate which libraries are +linked: ``USEDLIBS`` and ``LLVMLIBS``. This distinction is necessary to support +projects. ``LLVMLIBS`` refers to the LLVM libraries found in the LLVM object +directory. ``USEDLIBS`` refers to the libraries built by your project. In the +case of building LLVM tools, ``USEDLIBS`` and ``LLVMLIBS`` can be used +interchangeably since the "project" is LLVM itself and ``USEDLIBS`` refers to +the same place as ``LLVMLIBS``. + +Also note that there are two different ways of specifying a library: with a +``.a`` suffix and without. Without the suffix, the entry refers to the re-linked +(.o) file which will include *all* symbols of the library. This is +useful, for example, to include all passes from a library of passes. If the +``.a`` suffix is used then the library is linked as a searchable library (with +the ``-l`` option). In this case, only the symbols that are unresolved *at +that point* will be resolved from the library, if they exist. Other +(unreferenced) symbols will not be included when the ``.a`` syntax is used. Note +that in order to use the ``.a`` suffix, the library in question must have been +built with the ``ARCHIVE_LIBRARY`` option set. + +JIT Tools +^^^^^^^^^ + +Many tools will want to use the JIT features of LLVM. To do this, you simply +specify that you want an execution 'engine', and the makefiles will +automatically link in the appropriate JIT for the host or an interpreter if none +is available: + +.. code-block:: makefile + + TOOLNAME = my_jit_tool + USEDLIBS = mylib + LINK_COMPONENTS = engine + +Of course, any additional libraries may be listed as other components. To get a +full understanding of how this changes the linker command, it is recommended +that you: + +.. code-block:: bash + + % cd examples/Fibonacci + % make VERBOSE=1 + +Targets Supported +================= + +This section describes each of the targets that can be built using the LLVM +Makefile system. Any target can be invoked from any directory but not all are +applicable to a given directory (e.g. "check", "dist" and "install" will always +operate as if invoked from the top level directory). + +================= =============== ================== +Target Name Implied Targets Target Description +================= =============== ================== +``all`` \ Compile the software recursively. Default target. +``all-local`` \ Compile the software in the local directory only. +``check`` \ Change to the ``test`` directory in a project and run the test suite there. +``check-local`` \ Run a local test suite. Generally this is only defined in the ``Makefile`` of the project's ``test`` directory. +``clean`` \ Remove built objects recursively. +``clean-local`` \ Remove built objects from the local directory only. +``dist`` ``all`` Prepare a source distribution tarball. +``dist-check`` ``all`` Prepare a source distribution tarball and check that it builds. +``dist-clean`` ``clean`` Clean source distribution tarball temporary files. +``install`` ``all`` Copy built objects to installation directory. +``preconditions`` ``all`` Check to make sure configuration and makefiles are up to date. +``printvars`` ``all`` Prints variables defined by the makefile system (for debugging). +``tags`` \ Make C and C++ tags files for emacs and vi. +``uninstall`` \ Remove built objects from installation directory. +================= =============== ================== + +.. _all: + +``all`` (default) +----------------- + +When you invoke ``make`` with no arguments, you are implicitly instructing it to +seek the ``all`` target (goal). This target is used for building the software +recursively and will do different things in different directories. For example, +in a ``lib`` directory, the ``all`` target will compile source files and +generate libraries. But, in a ``tools`` directory, it will link libraries and +generate executables. + +``all-local`` +------------- + +This target is the same as `all`_ but it operates only on the current directory +instead of recursively. + +``check`` +--------- + +This target can be invoked from anywhere within a project's directories but +always invokes the `check-local`_ target in the project's ``test`` directory, if +it exists and has a ``Makefile``. A warning is produced otherwise. If +`TESTSUITE`_ is defined on the ``make`` command line, it will be passed down to +the invocation of ``make check-local`` in the ``test`` directory. The intended +usage for this is to assist in running specific suites of tests. If +``TESTSUITE`` is not set, the implementation of ``check-local`` should run all +normal tests. It is up to the project to define what different values for +``TESTSUTE`` will do. See the `Testing Guide <TestingGuide.html>`_ for further +details. + +``check-local`` +--------------- + +This target should be implemented by the ``Makefile`` in the project's ``test`` +directory. It is invoked by the ``check`` target elsewhere. Each project is +free to define the actions of ``check-local`` as appropriate for that +project. The LLVM project itself uses dejagnu to run a suite of feature and +regresson tests. Other projects may choose to use dejagnu or any other testing +mechanism. + +``clean`` +--------- + +This target cleans the build directory, recursively removing all things that the +Makefile builds. The cleaning rules have been made guarded so they shouldn't go +awry (via ``rm -f $(UNSET_VARIABLE)/*`` which will attempt to erase the entire +directory structure. + +``clean-local`` +--------------- + +This target does the same thing as ``clean`` but only for the current (local) +directory. + +``dist`` +-------- + +This target builds a distribution tarball. It first builds the entire project +using the ``all`` target and then tars up the necessary files and compresses +it. The generated tarball is sufficient for a casual source distribution, but +probably not for a release (see ``dist-check``). + +``dist-check`` +-------------- + +This target does the same thing as the ``dist`` target but also checks the +distribution tarball. The check is made by unpacking the tarball to a new +directory, configuring it, building it, installing it, and then verifying that +the installation results are correct (by comparing to the original build). This +target can take a long time to run but should be done before a release goes out +to make sure that the distributed tarball can actually be built into a working +release. + +``dist-clean`` +-------------- + +This is a special form of the ``clean`` clean target. It performs a normal +``clean`` but also removes things pertaining to building the distribution. + +``install`` +----------- + +This target finalizes shared objects and executables and copies all libraries, +headers, executables and documentation to the directory given with the +``--prefix`` option to ``configure``. When completed, the prefix directory will +have everything needed to **use** LLVM. + +The LLVM makefiles can generate complete **internal** documentation for all the +classes by using ``doxygen``. By default, this feature is **not** enabled +because it takes a long time and generates a massive amount of data (>100MB). If +you want this feature, you must configure LLVM with the --enable-doxygen switch +and ensure that a modern version of doxygen (1.3.7 or later) is available in +your ``PATH``. You can download doxygen from `here +<http://www.stack.nl/~dimitri/doxygen/download.html#latestsrc>`_. + +``preconditions`` +----------------- + +This utility target checks to see if the ``Makefile`` in the object directory is +older than the ``Makefile`` in the source directory and copies it if so. It also +reruns the ``configure`` script if that needs to be done and rebuilds the +``Makefile.config`` file similarly. Users may overload this target to ensure +that sanity checks are run *before* any building of targets as all the targets +depend on ``preconditions``. + +``printvars`` +------------- + +This utility target just causes the LLVM makefiles to print out some of the +makefile variables so that you can double check how things are set. + +``reconfigure`` +--------------- + +This utility target will force a reconfigure of LLVM or your project. It simply +runs ``$(PROJ_OBJ_ROOT)/config.status --recheck`` to rerun the configuration +tests and rebuild the configured files. This isn't generally useful as the +makefiles will reconfigure themselves whenever its necessary. + +``spotless`` +------------ + +.. warning:: + + Use with caution! + +This utility target, only available when ``$(PROJ_OBJ_ROOT)`` is not the same as +``$(PROJ_SRC_ROOT)``, will completely clean the ``$(PROJ_OBJ_ROOT)`` directory +by removing its content entirely and reconfiguring the directory. This returns +the ``$(PROJ_OBJ_ROOT)`` directory to a completely fresh state. All content in +the directory except configured files and top-level makefiles will be lost. + +``tags`` +-------- + +This target will generate a ``TAGS`` file in the top-level source directory. It +is meant for use with emacs, XEmacs, or ViM. The TAGS file provides an index of +symbol definitions so that the editor can jump you to the definition +quickly. + +``uninstall`` +------------- + +This target is the opposite of the ``install`` target. It removes the header, +library and executable files from the installation directories. Note that the +directories themselves are not removed because it is not guaranteed that LLVM is +the only thing installing there (e.g. ``--prefix=/usr``). + +.. _variables: + +Variables +========= + +Variables are used to tell the LLVM Makefile System what to do and to obtain +information from it. Variables are also used internally by the LLVM Makefile +System. Variable names that contain only the upper case alphabetic letters and +underscore are intended for use by the end user. All other variables are +internal to the LLVM Makefile System and should not be relied upon nor +modified. The sections below describe how to use the LLVM Makefile +variables. + +Control Variables +----------------- + +Variables listed in the table below should be set *before* the inclusion of +`$(LEVEL)/Makefile.common`_. These variables provide input to the LLVM make +system that tell it what to do for the current directory. + +``BUILD_ARCHIVE`` + If set to any value, causes an archive (.a) library to be built. + +``BUILT_SOURCES`` + Specifies a set of source files that are generated from other source + files. These sources will be built before any other target processing to + ensure they are present. + +``BYTECODE_LIBRARY`` + If set to any value, causes a bitcode library (.bc) to be built. + +``CONFIG_FILES`` + Specifies a set of configuration files to be installed. + +``DEBUG_SYMBOLS`` + If set to any value, causes the build to include debugging symbols even in + optimized objects, libraries and executables. This alters the flags + specified to the compilers and linkers. Debugging isn't fun in an optimized + build, but it is possible. + +``DIRS`` + Specifies a set of directories, usually children of the current directory, + that should also be made using the same goal. These directories will be + built serially. + +``DISABLE_AUTO_DEPENDENCIES`` + If set to any value, causes the makefiles to **not** automatically generate + dependencies when running the compiler. Use of this feature is discouraged + and it may be removed at a later date. + +``ENABLE_OPTIMIZED`` + If set to 1, causes the build to generate optimized objects, libraries and + executables. This alters the flags specified to the compilers and + linkers. Generally debugging won't be a fun experience with an optimized + build. + +``ENABLE_PROFILING`` + If set to 1, causes the build to generate both optimized and profiled + objects, libraries and executables. This alters the flags specified to the + compilers and linkers to ensure that profile data can be collected from the + tools built. Use the ``gprof`` tool to analyze the output from the profiled + tools (``gmon.out``). + +``DISABLE_ASSERTIONS`` + If set to 1, causes the build to disable assertions, even if building a + debug or profile build. This will exclude all assertion check code from the + build. LLVM will execute faster, but with little help when things go + wrong. + +``EXPERIMENTAL_DIRS`` + Specify a set of directories that should be built, but if they fail, it + should not cause the build to fail. Note that this should only be used + temporarily while code is being written. + +``EXPORTED_SYMBOL_FILE`` + Specifies the name of a single file that contains a list of the symbols to + be exported by the linker. One symbol per line. + +``EXPORTED_SYMBOL_LIST`` + Specifies a set of symbols to be exported by the linker. + +``EXTRA_DIST`` + Specifies additional files that should be distributed with LLVM. All source + files, all built sources, all Makefiles, and most documentation files will + be automatically distributed. Use this variable to distribute any files that + are not automatically distributed. + +``KEEP_SYMBOLS`` + If set to any value, specifies that when linking executables the makefiles + should retain debug symbols in the executable. Normally, symbols are + stripped from the executable. + +``LEVEL`` (required) + Specify the level of nesting from the top level. This variable must be set + in each makefile as it is used to find the top level and thus the other + makefiles. + +``LIBRARYNAME`` + Specify the name of the library to be built. (Required For Libraries) + +``LINK_COMPONENTS`` + When specified for building a tool, the value of this variable will be + passed to the ``llvm-config`` tool to generate a link line for the + tool. Unlike ``USEDLIBS`` and ``LLVMLIBS``, not all libraries need to be + specified. The ``llvm-config`` tool will figure out the library dependencies + and add any libraries that are needed. The ``USEDLIBS`` variable can still + be used in conjunction with ``LINK_COMPONENTS`` so that additional + project-specific libraries can be linked with the LLVM libraries specified + by ``LINK_COMPONENTS``. + +.. _LINK_LIBS_IN_SHARED: + +``LINK_LIBS_IN_SHARED`` + By default, shared library linking will ignore any libraries specified with + the `LLVMLIBS`_ or `USEDLIBS`_. This prevents shared libs from including + things that will be in the LLVM tool the shared library will be loaded + into. However, sometimes it is useful to link certain libraries into your + shared library and this option enables that feature. + +.. _LLVMLIBS: + +``LLVMLIBS`` + Specifies the set of libraries from the LLVM ``$(ObjDir)`` that will be + linked into the tool or library. + +``LOADABLE_MODULE`` + If set to any value, causes the shared library being built to also be a + loadable module. Loadable modules can be opened with the dlopen() function + and searched with dlsym (or the operating system's equivalent). Note that + setting this variable without also setting ``SHARED_LIBRARY`` will have no + effect. + +.. _MODULE_NAME: + +``MODULE_NAME`` + Specifies the name of a bitcode module to be created. A bitcode module can + be specified in conjunction with other kinds of library builds or by + itself. It constructs from the sources a single linked bitcode file. + +``NO_INSTALL`` + Specifies that the build products of the directory should not be installed + but should be built even if the ``install`` target is given. This is handy + for directories that build libraries or tools that are only used as part of + the build process, such as code generators (e.g. ``tblgen``). + +``OPTIONAL_DIRS`` + Specify a set of directories that may be built, if they exist, but its not + an error for them not to exist. + +``PARALLEL_DIRS`` + Specify a set of directories to build recursively and in parallel if the + ``-j`` option was used with ``make``. + +.. _SHARED_LIBRARY: + +``SHARED_LIBRARY`` + If set to any value, causes a shared library (``.so``) to be built in + addition to any other kinds of libraries. Note that this option will cause + all source files to be built twice: once with options for position + independent code and once without. Use it only where you really need a + shared library. + +``SOURCES`` (optional) + Specifies the list of source files in the current directory to be + built. Source files of any type may be specified (programs, documentation, + config files, etc.). If not specified, the makefile system will infer the + set of source files from the files present in the current directory. + +``SUFFIXES`` + Specifies a set of filename suffixes that occur in suffix match rules. Only + set this if your local ``Makefile`` specifies additional suffix match + rules. + +``TARGET`` + Specifies the name of the LLVM code generation target that the current + directory builds. Setting this variable enables additional rules to build + ``.inc`` files from ``.td`` files. + +.. _TESTSUITE: + +``TESTSUITE`` + Specifies the directory of tests to run in ``llvm/test``. + +``TOOLNAME`` + Specifies the name of the tool that the current directory should build. + +``TOOL_VERBOSE`` + Implies ``VERBOSE`` and also tells each tool invoked to be verbose. This is + handy when you're trying to see the sub-tools invoked by each tool invoked + by the makefile. For example, this will pass ``-v`` to the GCC compilers + which causes it to print out the command lines it uses to invoke sub-tools + (compiler, assembler, linker). + +.. _USEDLIBS: + +``USEDLIBS`` + Specifies the list of project libraries that will be linked into the tool or + library. + +``VERBOSE`` + Tells the Makefile system to produce detailed output of what it is doing + instead of just summary comments. This will generate a LOT of output. + +Override Variables +------------------ + +Override variables can be used to override the default values provided by the +LLVM makefile system. These variables can be set in several ways: + +* In the environment (e.g. setenv, export) --- not recommended. +* On the ``make`` command line --- recommended. +* On the ``configure`` command line. +* In the Makefile (only *after* the inclusion of `$(LEVEL)/Makefile.common`_). + +The override variables are given below: + +``AR`` (defaulted) + Specifies the path to the ``ar`` tool. + +``PROJ_OBJ_DIR`` + The directory into which the products of build rules will be placed. This + might be the same as `PROJ_SRC_DIR`_ but typically is not. + +.. _PROJ_SRC_DIR: + +``PROJ_SRC_DIR`` + The directory which contains the source files to be built. + +``BUILD_EXAMPLES`` + If set to 1, build examples in ``examples`` and (if building Clang) + ``tools/clang/examples`` directories. + +``BZIP2`` (configured) + The path to the ``bzip2`` tool. + +``CC`` (configured) + The path to the 'C' compiler. + +``CFLAGS`` + Additional flags to be passed to the 'C' compiler. + +``CXX`` + Specifies the path to the C++ compiler. + +``CXXFLAGS`` + Additional flags to be passed to the C++ compiler. + +``DATE`` (configured) + Specifies the path to the ``date`` program or any program that can generate + the current date and time on its standard output. + +``DOT`` (configured) + Specifies the path to the ``dot`` tool or ``false`` if there isn't one. + +``ECHO`` (configured) + Specifies the path to the ``echo`` tool for printing output. + +``EXEEXT`` (configured) + Provides the extension to be used on executables built by the makefiles. + The value may be empty on platforms that do not use file extensions for + executables (e.g. Unix). + +``INSTALL`` (configured) + Specifies the path to the ``install`` tool. + +``LDFLAGS`` (configured) + Allows users to specify additional flags to pass to the linker. + +``LIBS`` (configured) + The list of libraries that should be linked with each tool. + +``LIBTOOL`` (configured) + Specifies the path to the ``libtool`` tool. This tool is renamed ``mklib`` + by the ``configure`` script. + +``LLVMAS`` (defaulted) + Specifies the path to the ``llvm-as`` tool. + +``LLVMCC`` + Specifies the path to the LLVM capable compiler. + +``LLVMCXX`` + Specifies the path to the LLVM C++ capable compiler. + +``LLVMGCC`` (defaulted) + Specifies the path to the LLVM version of the GCC 'C' Compiler. + +``LLVMGXX`` (defaulted) + Specifies the path to the LLVM version of the GCC C++ Compiler. + +``LLVMLD`` (defaulted) + Specifies the path to the LLVM bitcode linker tool + +``LLVM_OBJ_ROOT`` (configured) + Specifies the top directory into which the output of the build is placed. + +``LLVM_SRC_ROOT`` (configured) + Specifies the top directory in which the sources are found. + +``LLVM_TARBALL_NAME`` (configured) + Specifies the name of the distribution tarball to create. This is configured + from the name of the project and its version number. + +``MKDIR`` (defaulted) + Specifies the path to the ``mkdir`` tool that creates directories. + +``ONLY_TOOLS`` + If set, specifies the list of tools to build. + +``PLATFORMSTRIPOPTS`` + The options to provide to the linker to specify that a stripped (no symbols) + executable should be built. + +``RANLIB`` (defaulted) + Specifies the path to the ``ranlib`` tool. + +``RM`` (defaulted) + Specifies the path to the ``rm`` tool. + +``SED`` (defaulted) + Specifies the path to the ``sed`` tool. + +``SHLIBEXT`` (configured) + Provides the filename extension to use for shared libraries. + +``TBLGEN`` (defaulted) + Specifies the path to the ``tblgen`` tool. + +``TAR`` (defaulted) + Specifies the path to the ``tar`` tool. + +``ZIP`` (defaulted) + Specifies the path to the ``zip`` tool. + +Readable Variables +------------------ + +Variables listed in the table below can be used by the user's Makefile but +should not be changed. Changing the value will generally cause the build to go +wrong, so don't do it. + +``bindir`` + The directory into which executables will ultimately be installed. This + value is derived from the ``--prefix`` option given to ``configure``. + +``BuildMode`` + The name of the type of build being performed: Debug, Release, or + Profile. + +``bytecode_libdir`` + The directory into which bitcode libraries will ultimately be installed. + This value is derived from the ``--prefix`` option given to ``configure``. + +``ConfigureScriptFLAGS`` + Additional flags given to the ``configure`` script when reconfiguring. + +``DistDir`` + The *current* directory for which a distribution copy is being made. + +.. _Echo: + +``Echo`` + The LLVM Makefile System output command. This provides the ``llvm[n]`` + prefix and starts with ``@`` so the command itself is not printed by + ``make``. + +``EchoCmd`` + Same as `Echo`_ but without the leading ``@``. + +``includedir`` + The directory into which include files will ultimately be installed. This + value is derived from the ``--prefix`` option given to ``configure``. + +``libdir`` + The directory into which native libraries will ultimately be installed. + This value is derived from the ``--prefix`` option given to + ``configure``. + +``LibDir`` + The configuration specific directory into which libraries are placed before + installation. + +``MakefileConfig`` + Full path of the ``Makefile.config`` file. + +``MakefileConfigIn`` + Full path of the ``Makefile.config.in`` file. + +``ObjDir`` + The configuration and directory specific directory where build objects + (compilation results) are placed. + +``SubDirs`` + The complete list of sub-directories of the current directory as + specified by other variables. + +``Sources`` + The complete list of source files. + +``sysconfdir`` + The directory into which configuration files will ultimately be + installed. This value is derived from the ``--prefix`` option given to + ``configure``. + +``ToolDir`` + The configuration specific directory into which executables are placed + before they are installed. + +``TopDistDir`` + The top most directory into which the distribution files are copied. + +``Verb`` + Use this as the first thing on your build script lines to enable or disable + verbose mode. It expands to either an ``@`` (quiet mode) or nothing (verbose + mode). + +Internal Variables +------------------ + +Variables listed below are used by the LLVM Makefile System and considered +internal. You should not use these variables under any circumstances. + +.. code-block:: makefile + + Archive + AR.Flags + BaseNameSources + BCCompile.C + BCCompile.CXX + BCLinkLib + C.Flags + Compile.C + CompileCommonOpts + Compile.CXX + ConfigStatusScript + ConfigureScript + CPP.Flags + CPP.Flags + CXX.Flags + DependFiles + DestArchiveLib + DestBitcodeLib + DestModule + DestSharedLib + DestTool + DistAlways + DistCheckDir + DistCheckTop + DistFiles + DistName + DistOther + DistSources + DistSubDirs + DistTarBZ2 + DistTarGZip + DistZip + ExtraLibs + FakeSources + INCFiles + InternalTargets + LD.Flags + LibName.A + LibName.BC + LibName.LA + LibName.O + LibTool.Flags + Link + LinkModule + LLVMLibDir + LLVMLibsOptions + LLVMLibsPaths + LLVMToolDir + LLVMUsedLibs + LocalTargets + Module + ObjectsBC + ObjectsLO + ObjectsO + ObjMakefiles + ParallelTargets + PreConditions + ProjLibsOptions + ProjLibsPaths + ProjUsedLibs + Ranlib + RecursiveTargets + SrcMakefiles + Strip + StripWarnMsg + TableGen + TDFiles + ToolBuildPath + TopLevelTargets + UserTargets diff --git a/docs/Packaging.html b/docs/Packaging.html deleted file mode 100644 index 51e9375e85..0000000000 --- a/docs/Packaging.html +++ /dev/null @@ -1,119 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>Advice on Packaging LLVM</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1>Advice on Packaging LLVM</h1> -<ol> - <li><a href="#overview">Overview</a></li> - <li><a href="#compilation">Compile Flags</a></li> - <li><a href="#cxx-features">C++ Features</a></li> - <li><a href="#shared-library">Shared Library</a></li> - <li><a href="#deps">Dependencies</a></li> -</ol> - -<!--=========================================================================--> -<h2><a name="overview">Overview</a></h2> -<!--=========================================================================--> -<div> - -<p>LLVM sets certain default configure options to make sure our developers don't -break things for constrained platforms. These settings are not optimal for most -desktop systems, and we hope that packagers (e.g., Redhat, Debian, MacPorts, -etc.) will tweak them. This document lists settings we suggest you tweak. -</p> - -<p>LLVM's API changes with each release, so users are likely to want, for -example, both LLVM-2.6 and LLVM-2.7 installed at the same time to support apps -developed against each. -</p> -</div> - -<!--=========================================================================--> -<h2><a name="compilation">Compile Flags</a></h2> -<!--=========================================================================--> -<div> - -<p>LLVM runs much more quickly when it's optimized and assertions are removed. -However, such a build is currently incompatible with users who build without -defining NDEBUG, and the lack of assertions makes it hard to debug problems in -user code. We recommend allowing users to install both optimized and debug -versions of LLVM in parallel. The following configure flags are relevant: -</p> - -<dl> - <dt><tt>--disable-assertions</tt></dt><dd>Builds LLVM with <tt>NDEBUG</tt> - defined. Changes the LLVM ABI. Also available by setting - <tt>DISABLE_ASSERTIONS=0|1</tt> in <tt>make</tt>'s environment. This defaults - to enabled regardless of the optimization setting, but it slows things - down.</dd> - - <dt><tt>--enable-debug-symbols</tt></dt><dd>Builds LLVM with <tt>-g</tt>. - Also available by setting <tt>DEBUG_SYMBOLS=0|1</tt> in <tt>make</tt>'s - environment. This defaults to disabled when optimizing, so you should turn it - back on to let users debug their programs.</dd> - - <dt><tt>--enable-optimized</tt></dt><dd>(For svn checkouts) Builds LLVM with - <tt>-O2</tt> and, by default, turns off debug symbols. Also available by - setting <tt>ENABLE_OPTIMIZED=0|1</tt> in <tt>make</tt>'s environment. This - defaults to enabled when not in a checkout.</dd> -</dl> -</div> - -<!--=========================================================================--> -<h2><a name="cxx-features">C++ Features</a></h2> -<!--=========================================================================--> -<div> - -<dl> - <dt>RTTI</dt><dd>LLVM disables RTTI by default. Add <tt>REQUIRES_RTTI=1</tt> - to your environment while running <tt>make</tt> to re-enable it. This will - allow users to build with RTTI enabled and still inherit from LLVM - classes.</dd> -</dl> -</div> - -<!--=========================================================================--> -<h2><a name="shared-library">Shared Library</a></h2> -<!--=========================================================================--> -<div> - -<p>Configure with <tt>--enable-shared</tt> to build -<tt>libLLVM-<var>major</var>.<var>minor</var>.(so|dylib)</tt> and link the tools -against it. This saves lots of binary size at the cost of some startup time. -</p> -</div> - -<!--=========================================================================--> -<h2><a name="deps">Dependencies</a></h2> -<!--=========================================================================--> -<div> - -<dl> -<dt><tt>--enable-libffi</tt></dt><dd>Depend on <a -href="http://sources.redhat.com/libffi/">libffi</a> to allow the LLVM -interpreter to call external functions.</dd> -<dt><tt>--with-oprofile</tt></dt><dd>Depend on <a -href="http://oprofile.sourceforge.net/doc/devel/index.html">libopagent</a> -(>=version 0.9.4) to let the LLVM JIT tell oprofile about function addresses and -line numbers.</dd> -</dl> -</div> - -<!-- *********************************************************************** --> -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> -</body> -</html> diff --git a/docs/Packaging.rst b/docs/Packaging.rst new file mode 100644 index 0000000000..6e74158d72 --- /dev/null +++ b/docs/Packaging.rst @@ -0,0 +1,75 @@ +.. _packaging: + +======================== +Advice on Packaging LLVM +======================== + +.. contents:: + :local: + +Overview +======== + +LLVM sets certain default configure options to make sure our developers don't +break things for constrained platforms. These settings are not optimal for most +desktop systems, and we hope that packagers (e.g., Redhat, Debian, MacPorts, +etc.) will tweak them. This document lists settings we suggest you tweak. + +LLVM's API changes with each release, so users are likely to want, for example, +both LLVM-2.6 and LLVM-2.7 installed at the same time to support apps developed +against each. + +Compile Flags +============= + +LLVM runs much more quickly when it's optimized and assertions are removed. +However, such a build is currently incompatible with users who build without +defining ``NDEBUG``, and the lack of assertions makes it hard to debug problems +in user code. We recommend allowing users to install both optimized and debug +versions of LLVM in parallel. The following configure flags are relevant: + +``--disable-assertions`` + Builds LLVM with ``NDEBUG`` defined. Changes the LLVM ABI. Also available + by setting ``DISABLE_ASSERTIONS=0|1`` in ``make``'s environment. This + defaults to enabled regardless of the optimization setting, but it slows + things down. + +``--enable-debug-symbols`` + Builds LLVM with ``-g``. Also available by setting ``DEBUG_SYMBOLS=0|1`` in + ``make``'s environment. This defaults to disabled when optimizing, so you + should turn it back on to let users debug their programs. + +``--enable-optimized`` + (For svn checkouts) Builds LLVM with ``-O2`` and, by default, turns off + debug symbols. Also available by setting ``ENABLE_OPTIMIZED=0|1`` in + ``make``'s environment. This defaults to enabled when not in a + checkout. + +C++ Features +============ + +RTTI + LLVM disables RTTI by default. Add ``REQUIRES_RTTI=1`` to your environment + while running ``make`` to re-enable it. This will allow users to build with + RTTI enabled and still inherit from LLVM classes. + +Shared Library +============== + +Configure with ``--enable-shared`` to build +``libLLVM-<major>.<minor>.(so|dylib)`` and link the tools against it. This +saves lots of binary size at the cost of some startup time. + +Dependencies +============ + +``--enable-libffi`` + Depend on `libffi <http://sources.redhat.com/libffi/>`_ to allow the LLVM + interpreter to call external functions. + +``--with-oprofile`` + + Depend on `libopagent + <http://oprofile.sourceforge.net/doc/devel/index.html>`_ (>=version 0.9.4) + to let the LLVM JIT tell oprofile about function addresses and line + numbers. diff --git a/docs/Projects.html b/docs/Projects.html deleted file mode 100644 index 45f6af5711..0000000000 --- a/docs/Projects.html +++ /dev/null @@ -1,482 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>Creating an LLVM Project</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1>Creating an LLVM Project</h1> - -<ol> -<li><a href="#overview">Overview</a></li> -<li><a href="#create">Create a project from the Sample Project</a></li> -<li><a href="#source">Source tree layout</a></li> -<li><a href="#makefiles">Writing LLVM-style Makefiles</a> - <ol> - <li><a href="#reqVars">Required Variables</a></li> - <li><a href="#varsBuildDir">Variables for Building Subdirectories</a></li> - <li><a href="#varsBuildLib">Variables for Building Libraries</a></li> - <li><a href="#varsBuildProg">Variables for Building Programs</a></li> - <li><a href="#miscVars">Miscellaneous Variables</a></li> - </ol></li> -<li><a href="#objcode">Placement of object code</a></li> -<li><a href="#help">Further help</a></li> -</ol> - -<div class="doc_author"> - <p>Written by John Criswell</p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="overview">Overview</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>The LLVM build system is designed to facilitate the building of third party -projects that use LLVM header files, libraries, and tools. In order to use -these facilities, a Makefile from a project must do the following things:</p> - -<ol> - <li>Set <tt>make</tt> variables. There are several variables that a Makefile - needs to set to use the LLVM build system: - <ul> - <li><tt>PROJECT_NAME</tt> - The name by which your project is known.</li> - <li><tt>LLVM_SRC_ROOT</tt> - The root of the LLVM source tree.</li> - <li><tt>LLVM_OBJ_ROOT</tt> - The root of the LLVM object tree.</li> - <li><tt>PROJ_SRC_ROOT</tt> - The root of the project's source tree.</li> - <li><tt>PROJ_OBJ_ROOT</tt> - The root of the project's object tree.</li> - <li><tt>PROJ_INSTALL_ROOT</tt> - The root installation directory.</li> - <li><tt>LEVEL</tt> - The relative path from the current directory to the - project's root ($PROJ_OBJ_ROOT).</li> - </ul></li> - <li>Include <tt>Makefile.config</tt> from <tt>$(LLVM_OBJ_ROOT)</tt>.</li> - <li>Include <tt>Makefile.rules</tt> from <tt>$(LLVM_SRC_ROOT)</tt>.</li> -</ol> - -<p>There are two ways that you can set all of these variables:</p> -<ol> - <li>You can write your own Makefiles which hard-code these values.</li> - <li>You can use the pre-made LLVM sample project. This sample project - includes Makefiles, a configure script that can be used to configure the - location of LLVM, and the ability to support multiple object directories - from a single source directory.</li> -</ol> - -<p>This document assumes that you will base your project on the LLVM sample -project found in <tt>llvm/projects/sample</tt>. If you want to devise your own -build system, studying the sample project and LLVM Makefiles will probably -provide enough information on how to write your own Makefiles.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="create">Create a Project from the Sample Project</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>Follow these simple steps to start your project:</p> - -<ol> -<li>Copy the <tt>llvm/projects/sample</tt> directory to any place of your -choosing. You can place it anywhere you like. Rename the directory to match -the name of your project.</li> - -<li> -If you downloaded LLVM using Subversion, remove all the directories named .svn -(and all the files therein) from your project's new source tree. This will -keep Subversion from thinking that your project is inside -<tt>llvm/trunk/projects/sample</tt>.</li> - -<li>Add your source code and Makefiles to your source tree.</li> - -<li>If you want your project to be configured with the <tt>configure</tt> script -then you need to edit <tt>autoconf/configure.ac</tt> as follows: - <ul> - <li><b>AC_INIT</b>. Place the name of your project, its version number and - a contact email address for your project as the arguments to this macro</li> - <li><b>AC_CONFIG_AUX_DIR</b>. If your project isn't in the - <tt>llvm/projects</tt> directory then you might need to adjust this so that - it specifies a relative path to the <tt>llvm/autoconf</tt> directory.</li> - <li><b>LLVM_CONFIG_PROJECT</b>. Just leave this alone.</li> - <li><b>AC_CONFIG_SRCDIR</b>. Specify a path to a file name that identifies - your project; or just leave it at <tt>Makefile.common.in</tt></li> - <li><b>AC_CONFIG_FILES</b>. Do not change.</li> - <li><b>AC_CONFIG_MAKEFILE</b>. Use one of these macros for each Makefile - that your project uses. This macro arranges for your makefiles to be copied - from the source directory, unmodified, to the build directory.</li> - </ul> -</li> - -<li>After updating <tt>autoconf/configure.ac</tt>, regenerate the -configure script with these commands: - -<div class="doc_code"> -<p><tt>% cd autoconf<br> - % ./AutoRegen.sh</tt></p> -</div> - -<p>You must be using Autoconf version 2.59 or later and your aclocal version -should be 1.9 or later.</p></li> - -<li>Run <tt>configure</tt> in the directory in which you want to place -object code. Use the following options to tell your project where it -can find LLVM: - - <dl> - <dt><tt>--with-llvmsrc=<directory></tt></dt> - <dd>Tell your project where the LLVM source tree is located.</dd> - <dt><br><tt>--with-llvmobj=<directory></tt></dt> - <dd>Tell your project where the LLVM object tree is located.</dd> - <dt><br><tt>--prefix=<directory></tt></dt> - <dd>Tell your project where it should get installed.</dd> - </dl> -</ol> - -<p>That's it! Now all you have to do is type <tt>gmake</tt> (or <tt>make</tt> -if your on a GNU/Linux system) in the root of your object directory, and your -project should build.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="source">Source Tree Layout</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>In order to use the LLVM build system, you will want to organize your -source code so that it can benefit from the build system's features. -Mainly, you want your source tree layout to look similar to the LLVM -source tree layout. The best way to do this is to just copy the -project tree from <tt>llvm/projects/sample</tt> and modify it to meet -your needs, but you can certainly add to it if you want.</p> - -<p>Underneath your top level directory, you should have the following -directories:</p> - -<dl> - <dt><b>lib</b> - <dd> - This subdirectory should contain all of your library source - code. For each library that you build, you will have one - directory in <b>lib</b> that will contain that library's source - code. - - <p> - Libraries can be object files, archives, or dynamic libraries. - The <b>lib</b> directory is just a convenient place for libraries - as it places them all in a directory from which they can be linked - later. - - <dt><b>include</b> - <dd> - This subdirectory should contain any header files that are - global to your project. By global, we mean that they are used - by more than one library or executable of your project. - <p> - By placing your header files in <b>include</b>, they will be - found automatically by the LLVM build system. For example, if - you have a file <b>include/jazz/note.h</b>, then your source - files can include it simply with <b>#include "jazz/note.h"</b>. - - <dt><b>tools</b> - <dd> - This subdirectory should contain all of your source - code for executables. For each program that you build, you - will have one directory in <b>tools</b> that will contain that - program's source code. - <p> - - <dt><b>test</b> - <dd> - This subdirectory should contain tests that verify that your code - works correctly. Automated tests are especially useful. - <p> - Currently, the LLVM build system provides basic support for tests. - The LLVM system provides the following: - <ul> - <li> - LLVM provides a tcl procedure that is used by Dejagnu to run - tests. It can be found in <tt>llvm/lib/llvm-dg.exp</tt>. This - test procedure uses RUN lines in the actual test case to determine - how to run the test. See the <a - href="TestingGuide.html">TestingGuide</a> for more details. You - can easily write Makefile support similar to the Makefiles in - <tt>llvm/test</tt> to use Dejagnu to run your project's tests.<br></li> - <li> - LLVM contains an optional package called <tt>llvm-test</tt> - which provides benchmarks and programs that are known to compile with the - LLVM GCC front ends. You can use these - programs to test your code, gather statistics information, and - compare it to the current LLVM performance statistics. - <br>Currently, there is no way to hook your tests directly into the - <tt>llvm/test</tt> testing harness. You will simply - need to find a way to use the source provided within that directory - on your own. - </ul> -</dl> - -<p>Typically, you will want to build your <b>lib</b> directory first followed by -your <b>tools</b> directory.</p> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="makefiles">Writing LLVM Style Makefiles</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>The LLVM build system provides a convenient way to build libraries and -executables. Most of your project Makefiles will only need to define a few -variables. Below is a list of the variables one can set and what they can -do:</p> - -<!-- ======================================================================= --> -<h3> - <a name="reqVars">Required Variables</a> -</h3> - -<div> - -<dl> - <dt>LEVEL - <dd> - This variable is the relative path from this Makefile to the - top directory of your project's source code. For example, if - your source code is in <tt>/tmp/src</tt>, then the Makefile in - <tt>/tmp/src/jump/high</tt> would set <tt>LEVEL</tt> to <tt>"../.."</tt>. -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="varsBuildDir">Variables for Building Subdirectories</a> -</h3> - -<div> - -<dl> - <dt>DIRS - <dd> - This is a space separated list of subdirectories that should be - built. They will be built, one at a time, in the order - specified. - <p> - - <dt>PARALLEL_DIRS - <dd> - This is a list of directories that can be built in parallel. - These will be built after the directories in DIRS have been - built. - <p> - - <dt>OPTIONAL_DIRS - <dd> - This is a list of directories that can be built if they exist, - but will not cause an error if they do not exist. They are - built serially in the order in which they are listed. -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="varsBuildLib">Variables for Building Libraries</a> -</h3> - -<div> - -<dl> - <dt>LIBRARYNAME - <dd> - This variable contains the base name of the library that will - be built. For example, to build a library named - <tt>libsample.a</tt>, LIBRARYNAME should be set to - <tt>sample</tt>. - <p> - - <dt>BUILD_ARCHIVE - <dd> - By default, a library is a <tt>.o</tt> file that is linked - directly into a program. To build an archive (also known as - a static library), set the BUILD_ARCHIVE variable. - <p> - - <dt>SHARED_LIBRARY - <dd> - If SHARED_LIBRARY is defined in your Makefile, a shared - (or dynamic) library will be built. -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="varsBuildProg">Variables for Building Programs</a> -</h3> - -<div> - -<dl> - <dt>TOOLNAME - <dd> - This variable contains the name of the program that will - be built. For example, to build an executable named - <tt>sample</tt>, TOOLNAME should be set to <tt>sample</tt>. - <p> - - <dt>USEDLIBS - <dd> - This variable holds a space separated list of libraries that should - be linked into the program. These libraries must be libraries that - come from your <b>lib</b> directory. The libraries must be - specified without their "lib" prefix. For example, to link - libsample.a, you would set USEDLIBS to - <tt>sample.a</tt>. - <p> - Note that this works only for statically linked libraries. - <p> - - <dt>LLVMLIBS - <dd> - This variable holds a space separated list of libraries that should - be linked into the program. These libraries must be LLVM libraries. - The libraries must be specified without their "lib" prefix. For - example, to link with a driver that performs an IR transformation - you might set LLVMLIBS to this minimal set of libraries - <tt>LLVMSupport.a LLVMCore.a LLVMBitReader.a LLVMAsmParser.a LLVMAnalysis.a LLVMTransformUtils.a LLVMScalarOpts.a LLVMTarget.a</tt>. - <p> - Note that this works only for statically linked libraries. LLVM is - split into a large number of static libraries, and the list of libraries you - require may be much longer than the list above. To see a full list - of libraries use: - <tt>llvm-config --libs all</tt>. - Using LINK_COMPONENTS as described below, obviates the need to set LLVMLIBS. - <p> - - <dt>LINK_COMPONENTS - <dd>This variable holds a space separated list of components that - the LLVM Makefiles pass to the <tt>llvm-config</tt> tool to generate - a link line for the program. For example, to link with all LLVM - libraries use - <tt>LINK_COMPONENTS = all</tt>. - <p> - - <dt>LIBS - <dd> - To link dynamic libraries, add <tt>-l<library base name></tt> to - the LIBS variable. The LLVM build system will look in the same places - for dynamic libraries as it does for static libraries. - <p> - For example, to link <tt>libsample.so</tt>, you would have the - following line in your <tt>Makefile</tt>: - <p> - <tt> - LIBS += -lsample - </tt> - <p> - Note that LIBS must occur in the Makefile after the inclusion of Makefile.common. - <p> -</dl> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="miscVars">Miscellaneous Variables</a> -</h3> - -<div> - -<dl> - <dt>CFLAGS - <dt>CPPFLAGS - <dd> - This variable can be used to add options to the C and C++ - compiler, respectively. It is typically used to add options - that tell the compiler the location of additional directories - to search for header files. - <p> - It is highly suggested that you append to CFLAGS and CPPFLAGS as - opposed to overwriting them. The master Makefiles may already - have useful options in them that you may not want to overwrite. - <p> -</dl> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="objcode">Placement of Object Code</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>The final location of built libraries and executables will depend upon -whether you do a Debug, Release, or Profile build.</p> - -<dl> - <dt>Libraries - <dd> - All libraries (static and dynamic) will be stored in - <tt>PROJ_OBJ_ROOT/<type>/lib</tt>, where type is <tt>Debug</tt>, - <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or - profiled build, respectively.<p> - - <dt>Executables - <dd>All executables will be stored in - <tt>PROJ_OBJ_ROOT/<type>/bin</tt>, where type is <tt>Debug</tt>, - <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or profiled - build, respectively. -</dl> - -</div> - -<!-- *********************************************************************** --> -<h2> - <a name="help">Further Help</a> -</h2> -<!-- *********************************************************************** --> - -<div> - -<p>If you have any questions or need any help creating an LLVM project, -the LLVM team would be more than happy to help. You can always post your -questions to the <a -href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developers -Mailing List</a>.</p> - -</div> - -<!-- *********************************************************************** --> -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:criswell@uiuc.edu">John Criswell</a><br> - <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a> - <br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/Projects.rst b/docs/Projects.rst new file mode 100644 index 0000000000..c4be6da5a0 --- /dev/null +++ b/docs/Projects.rst @@ -0,0 +1,327 @@ +.. _projects: + +======================== +Creating an LLVM Project +======================== + +.. contents:: + :local: + +Overview +======== + +The LLVM build system is designed to facilitate the building of third party +projects that use LLVM header files, libraries, and tools. In order to use +these facilities, a ``Makefile`` from a project must do the following things: + +* Set ``make`` variables. There are several variables that a ``Makefile`` needs + to set to use the LLVM build system: + + * ``PROJECT_NAME`` - The name by which your project is known. + * ``LLVM_SRC_ROOT`` - The root of the LLVM source tree. + * ``LLVM_OBJ_ROOT`` - The root of the LLVM object tree. + * ``PROJ_SRC_ROOT`` - The root of the project's source tree. + * ``PROJ_OBJ_ROOT`` - The root of the project's object tree. + * ``PROJ_INSTALL_ROOT`` - The root installation directory. + * ``LEVEL`` - The relative path from the current directory to the + project's root ``($PROJ_OBJ_ROOT)``. + +* Include ``Makefile.config`` from ``$(LLVM_OBJ_ROOT)``. + +* Include ``Makefile.rules`` from ``$(LLVM_SRC_ROOT)``. + +There are two ways that you can set all of these variables: + +* You can write your own ``Makefiles`` which hard-code these values. + +* You can use the pre-made LLVM sample project. This sample project includes + ``Makefiles``, a configure script that can be used to configure the location + of LLVM, and the ability to support multiple object directories from a single + source directory. + +This document assumes that you will base your project on the LLVM sample project +found in ``llvm/projects/sample``. If you want to devise your own build system, +studying the sample project and LLVM ``Makefiles`` will probably provide enough +information on how to write your own ``Makefiles``. + +Create a Project from the Sample Project +======================================== + +Follow these simple steps to start your project: + +1. Copy the ``llvm/projects/sample`` directory to any place of your choosing. + You can place it anywhere you like. Rename the directory to match the name + of your project. + +2. If you downloaded LLVM using Subversion, remove all the directories named + ``.svn`` (and all the files therein) from your project's new source tree. + This will keep Subversion from thinking that your project is inside + ``llvm/trunk/projects/sample``. + +3. Add your source code and Makefiles to your source tree. + +4. If you want your project to be configured with the ``configure`` script then + you need to edit ``autoconf/configure.ac`` as follows: + + * **AC_INIT** - Place the name of your project, its version number and a + contact email address for your project as the arguments to this macro + + * **AC_CONFIG_AUX_DIR** - If your project isn't in the ``llvm/projects`` + directory then you might need to adjust this so that it specifies a + relative path to the ``llvm/autoconf`` directory. + + * **LLVM_CONFIG_PROJECT** - Just leave this alone. + + * **AC_CONFIG_SRCDIR** - Specify a path to a file name that identifies your + project; or just leave it at ``Makefile.common.in``. + + * **AC_CONFIG_FILES** - Do not change. + + * **AC_CONFIG_MAKEFILE** - Use one of these macros for each Makefile that + your project uses. This macro arranges for your makefiles to be copied from + the source directory, unmodified, to the build directory. + +5. After updating ``autoconf/configure.ac``, regenerate the configure script + with these commands. (You must be using ``Autoconf`` version 2.59 or later + and your ``aclocal`` version should be 1.9 or later.) + + .. code-block:: bash + + % cd autoconf + % ./AutoRegen.sh + +6. Run ``configure`` in the directory in which you want to place object code. + Use the following options to tell your project where it can find LLVM: + + ``--with-llvmsrc=<directory>`` + Tell your project where the LLVM source tree is located. + + ``--with-llvmobj=<directory>`` + Tell your project where the LLVM object tree is located. + + ``--prefix=<directory>`` + Tell your project where it should get installed. + +That's it! Now all you have to do is type ``gmake`` (or ``make`` if your on a +GNU/Linux system) in the root of your object directory, and your project should +build. + +Source Tree Layout +================== + +In order to use the LLVM build system, you will want to organize your source +code so that it can benefit from the build system's features. Mainly, you want +your source tree layout to look similar to the LLVM source tree layout. The +best way to do this is to just copy the project tree from +``llvm/projects/sample`` and modify it to meet your needs, but you can certainly +add to it if you want. + +Underneath your top level directory, you should have the following directories: + +**lib** + + This subdirectory should contain all of your library source code. For each + library that you build, you will have one directory in **lib** that will + contain that library's source code. + + Libraries can be object files, archives, or dynamic libraries. The **lib** + directory is just a convenient place for libraries as it places them all in + a directory from which they can be linked later. + +**include** + + This subdirectory should contain any header files that are global to your + project. By global, we mean that they are used by more than one library or + executable of your project. + + By placing your header files in **include**, they will be found + automatically by the LLVM build system. For example, if you have a file + **include/jazz/note.h**, then your source files can include it simply with + **#include "jazz/note.h"**. + +**tools** + + This subdirectory should contain all of your source code for executables. + For each program that you build, you will have one directory in **tools** + that will contain that program's source code. + +**test** + + This subdirectory should contain tests that verify that your code works + correctly. Automated tests are especially useful. + + Currently, the LLVM build system provides basic support for tests. The LLVM + system provides the following: + +* LLVM provides a ``tcl`` procedure that is used by ``Dejagnu`` to run tests. + It can be found in ``llvm/lib/llvm-dg.exp``. This test procedure uses ``RUN`` + lines in the actual test case to determine how to run the test. See the + `TestingGuide <TestingGuide.html>`_ for more details. You can easily write + Makefile support similar to the Makefiles in ``llvm/test`` to use ``Dejagnu`` + to run your project's tests. + +* LLVM contains an optional package called ``llvm-test``, which provides + benchmarks and programs that are known to compile with the Clang front + end. You can use these programs to test your code, gather statistical + information, and compare it to the current LLVM performance statistics. + + Currently, there is no way to hook your tests directly into the ``llvm/test`` + testing harness. You will simply need to find a way to use the source + provided within that directory on your own. + +Typically, you will want to build your **lib** directory first followed by your +**tools** directory. + +Writing LLVM Style Makefiles +============================ + +The LLVM build system provides a convenient way to build libraries and +executables. Most of your project Makefiles will only need to define a few +variables. Below is a list of the variables one can set and what they can +do: + +Required Variables +------------------ + +``LEVEL`` + + This variable is the relative path from this ``Makefile`` to the top + directory of your project's source code. For example, if your source code + is in ``/tmp/src``, then the ``Makefile`` in ``/tmp/src/jump/high`` + would set ``LEVEL`` to ``"../.."``. + +Variables for Building Subdirectories +------------------------------------- + +``DIRS`` + + This is a space separated list of subdirectories that should be built. They + will be built, one at a time, in the order specified. + +``PARALLEL_DIRS`` + + This is a list of directories that can be built in parallel. These will be + built after the directories in DIRS have been built. + +``OPTIONAL_DIRS`` + + This is a list of directories that can be built if they exist, but will not + cause an error if they do not exist. They are built serially in the order + in which they are listed. + +Variables for Building Libraries +-------------------------------- + +``LIBRARYNAME`` + + This variable contains the base name of the library that will be built. For + example, to build a library named ``libsample.a``, ``LIBRARYNAME`` should + be set to ``sample``. + +``BUILD_ARCHIVE`` + + By default, a library is a ``.o`` file that is linked directly into a + program. To build an archive (also known as a static library), set the + ``BUILD_ARCHIVE`` variable. + +``SHARED_LIBRARY`` + + If ``SHARED_LIBRARY`` is defined in your Makefile, a shared (or dynamic) + library will be built. + +Variables for Building Programs +------------------------------- + +``TOOLNAME`` + + This variable contains the name of the program that will be built. For + example, to build an executable named ``sample``, ``TOOLNAME`` should be set + to ``sample``. + +``USEDLIBS`` + + This variable holds a space separated list of libraries that should be + linked into the program. These libraries must be libraries that come from + your **lib** directory. The libraries must be specified without their + ``lib`` prefix. For example, to link ``libsample.a``, you would set + ``USEDLIBS`` to ``sample.a``. + + Note that this works only for statically linked libraries. + +``LLVMLIBS`` + + This variable holds a space separated list of libraries that should be + linked into the program. These libraries must be LLVM libraries. The + libraries must be specified without their ``lib`` prefix. For example, to + link with a driver that performs an IR transformation you might set + ``LLVMLIBS`` to this minimal set of libraries ``LLVMSupport.a LLVMCore.a + LLVMBitReader.a LLVMAsmParser.a LLVMAnalysis.a LLVMTransformUtils.a + LLVMScalarOpts.a LLVMTarget.a``. + + Note that this works only for statically linked libraries. LLVM is split + into a large number of static libraries, and the list of libraries you + require may be much longer than the list above. To see a full list of + libraries use: ``llvm-config --libs all``. Using ``LINK_COMPONENTS`` as + described below, obviates the need to set ``LLVMLIBS``. + +``LINK_COMPONENTS`` + + This variable holds a space separated list of components that the LLVM + ``Makefiles`` pass to the ``llvm-config`` tool to generate a link line for + the program. For example, to link with all LLVM libraries use + ``LINK_COMPONENTS = all``. + +``LIBS`` + + To link dynamic libraries, add ``-l<library base name>`` to the ``LIBS`` + variable. The LLVM build system will look in the same places for dynamic + libraries as it does for static libraries. + + For example, to link ``libsample.so``, you would have the following line in + your ``Makefile``: + + .. code-block:: makefile + + LIBS += -lsample + +Note that ``LIBS`` must occur in the Makefile after the inclusion of +``Makefile.common``. + +Miscellaneous Variables +----------------------- + +``CFLAGS`` & ``CPPFLAGS`` + + This variable can be used to add options to the C and C++ compiler, + respectively. It is typically used to add options that tell the compiler + the location of additional directories to search for header files. + + It is highly suggested that you append to ``CFLAGS`` and ``CPPFLAGS`` as + opposed to overwriting them. The master ``Makefiles`` may already have + useful options in them that you may not want to overwrite. + +Placement of Object Code +======================== + +The final location of built libraries and executables will depend upon whether +you do a ``Debug``, ``Release``, or ``Profile`` build. + +Libraries + + All libraries (static and dynamic) will be stored in + ``PROJ_OBJ_ROOT/<type>/lib``, where *type* is ``Debug``, ``Release``, or + ``Profile`` for a debug, optimized, or profiled build, respectively. + +Executables + + All executables will be stored in ``PROJ_OBJ_ROOT/<type>/bin``, where *type* + is ``Debug``, ``Release``, or ``Profile`` for a debug, optimized, or + profiled build, respectively. + +Further Help +============ + +If you have any questions or need any help creating an LLVM project, the LLVM +team would be more than happy to help. You can always post your questions to +the `LLVM Developers Mailing List +<http://lists.cs.uiuc.edu/pipermail/llvmdev/>`_. diff --git a/docs/SegmentedStacks.html b/docs/SegmentedStacks.html deleted file mode 100644 index 052003bc63..0000000000 --- a/docs/SegmentedStacks.html +++ /dev/null @@ -1,93 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> -<html> - <head> - <title>Segmented Stacks in LLVM</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> - <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> - </head> - - <body> - <h1>Segmented Stacks in LLVM</h1> - <div class="doc_author"> - <p>Written by <a href="mailto:sanjoy@playingwithpointers.com">Sanjoy Das</a></p> - </div> - - <ol> - <li><a href="#intro">Introduction</a></li> - <li><a href="#implementation">Implementation Details</a> - <ol> - <li><a href="#morestack">Allocating Stacklets</a></li> - <li><a href="#alloca">Variable Sized Allocas</a></li> - </ol> - </li> - </ol> - - <h2><a name="intro">Introduction</a></h2> - <div> - <p> - Segmented stack allows stack space to be allocated incrementally than as a monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called <em>stacklets</em>) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, call into the libgcc runtime to allocate more stack space. When using <tt>llc</tt>, segmented stacks can be enabled by adding <tt>-segmented-stacks</tt> to the command line. - </p> - <p> - The runtime functionality is <a href="http://gcc.gnu.org/wiki/SplitStacks">already there in libgcc</a>. - </p> - </div> - - <h2><a name="implementation">Implementation Details</a></h2> - <div> - <h3><a name="morestack">Allocating Stacklets</a></h3> - <div> - <p> - As mentioned above, the function prologue checks if the current stacklet has enough space. The current approach is to use a slot in the TCB to store the current stack limit (minus the amount of space needed to allocate a new block) - this slot's offset is again dictated by <code>libgcc</code>. The generated assembly looks like this on x86-64: - </p> - <pre> - leaq -8(%rsp), %r10 - cmpq %fs:112, %r10 - jg .LBB0_2 - - # More stack space needs to be allocated - movabsq $8, %r10 # The amount of space needed - movabsq $0, %r11 # The total size of arguments passed on stack - callq __morestack - ret # The reason for this extra return is explained below - .LBB0_2: - # Usual prologue continues here - </pre> - <p> - The size of function arguments on the stack needs to be passed to <code> __morestack</code> (this function is implemented in <code>libgcc</code>) since that number of bytes has to be copied from the previous stacklet to the current one. This is so that SP (and FP) relative addressing of function arguments work as expected. - </p> - <p> - The unusual <code>ret</code> is needed to have the function which made a call to <code>__morestack</code> return correctly. <code>__morestack</code>, instead of returning, calls into <code>.LBB0_2</code>. This is possible since both, the size of the <code>ret</code> instruction and the PC of call to <code>__morestack</code> are known. When the function body returns, control is transferred back to <code>__morestack</code>. <code>__morestack</code> then de-allocates the new stacklet, restores the correct SP value, and does a second return, which returns control to the correct caller. - </p> - </div> - - <h3><a name="alloca">Variable Sized Allocas</a></h3> - <div> - <p> - The section on <a href="#morestack">allocating stacklets</a> automatically assumes that every stack frame will be of fixed size. However, LLVM allows the use of the <code>llvm.alloca</code> intrinsic to allocate dynamically sized blocks of memory on the stack. When faced with such a variable-sized alloca, code is generated to - </p> - <ul> - <li>Check if the current stacklet has enough space. If yes, just bump the SP, like in the normal case.</li> - <li>If not, generate a call to <code>libgcc</code>, which allocates the memory from the heap.</li> - </ul> - <p> - The memory allocated from the heap is linked into a list in the current stacklet, and freed along with the same. This prevents a memory leak. - </p> - </div> - - </div> - - <hr> - <address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"> - <img src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"> - </a> - <a href="http://validator.w3.org/check/referer"> - <img src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> - </a> - <a href="mailto:sanjoy@playingwithpointers.com">Sanjoy Das</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ - </address> - </body> -</html> - diff --git a/docs/SegmentedStacks.rst b/docs/SegmentedStacks.rst new file mode 100644 index 0000000000..f97d62abda --- /dev/null +++ b/docs/SegmentedStacks.rst @@ -0,0 +1,80 @@ +.. _segmented_stacks: + +======================== +Segmented Stacks in LLVM +======================== + +.. contents:: + :local: + +Introduction +============ + +Segmented stack allows stack space to be allocated incrementally than as a +monolithic chunk (of some worst case size) at thread initialization. This is +done by allocating stack blocks (henceforth called *stacklets*) and linking them +into a doubly linked list. The function prologue is responsible for checking if +the current stacklet has enough space for the function to execute; and if not, +call into the libgcc runtime to allocate more stack space. When using ``llc``, +segmented stacks can be enabled by adding ``-segmented-stacks`` to the command +line. + +The runtime functionality is `already there in libgcc +<http://gcc.gnu.org/wiki/SplitStacks>`_. + +Implementation Details +====================== + +.. _allocating stacklets: + +Allocating Stacklets +-------------------- + +As mentioned above, the function prologue checks if the current stacklet has +enough space. The current approach is to use a slot in the TCB to store the +current stack limit (minus the amount of space needed to allocate a new block) - +this slot's offset is again dictated by ``libgcc``. The generated +assembly looks like this on x86-64: + +.. code-block:: nasm + + leaq -8(%rsp), %r10 + cmpq %fs:112, %r10 + jg .LBB0_2 + + # More stack space needs to be allocated + movabsq $8, %r10 # The amount of space needed + movabsq $0, %r11 # The total size of arguments passed on stack + callq __morestack + ret # The reason for this extra return is explained below + .LBB0_2: + # Usual prologue continues here + +The size of function arguments on the stack needs to be passed to +``__morestack`` (this function is implemented in ``libgcc``) since that number +of bytes has to be copied from the previous stacklet to the current one. This is +so that SP (and FP) relative addressing of function arguments work as expected. + +The unusual ``ret`` is needed to have the function which made a call to +``__morestack`` return correctly. ``__morestack``, instead of returning, calls +into ``.LBB0_2``. This is possible since both, the size of the ``ret`` +instruction and the PC of call to ``__morestack`` are known. When the function +body returns, control is transferred back to ``__morestack``. ``__morestack`` +then de-allocates the new stacklet, restores the correct SP value, and does a +second return, which returns control to the correct caller. + +Variable Sized Allocas +---------------------- + +The section on `allocating stacklets`_ automatically assumes that every stack +frame will be of fixed size. However, LLVM allows the use of the ``llvm.alloca`` +intrinsic to allocate dynamically sized blocks of memory on the stack. When +faced with such a variable-sized alloca, code is generated to: + +* Check if the current stacklet has enough space. If yes, just bump the SP, like + in the normal case. +* If not, generate a call to ``libgcc``, which allocates the memory from the + heap. + +The memory allocated from the heap is linked into a list in the current +stacklet, and freed along with the same. This prevents a memory leak. diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html deleted file mode 100644 index 5490eebb4f..0000000000 --- a/docs/TableGenFundamentals.html +++ /dev/null @@ -1,978 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> - <title>TableGen Fundamentals</title> - <link rel="stylesheet" href="_static/llvm.css" type="text/css"> -</head> -<body> - -<h1>TableGen Fundamentals</h1> - -<div> -<ul> - <li><a href="#introduction">Introduction</a> - <ol> - <li><a href="#concepts">Basic concepts</a></li> - <li><a href="#example">An example record</a></li> - <li><a href="#running">Running TableGen</a></li> - </ol></li> - <li><a href="#syntax">TableGen syntax</a> - <ol> - <li><a href="#primitives">TableGen primitives</a> - <ol> - <li><a href="#comments">TableGen comments</a></li> - <li><a href="#types">The TableGen type system</a></li> - <li><a href="#values">TableGen values and expressions</a></li> - </ol></li> - <li><a href="#classesdefs">Classes and definitions</a> - <ol> - <li><a href="#valuedef">Value definitions</a></li> - <li><a href="#recordlet">'let' expressions</a></li> - <li><a href="#templateargs">Class template arguments</a></li> - <li><a href="#multiclass">Multiclass definitions and instances</a></li> - </ol></li> - <li><a href="#filescope">File scope entities</a> - <ol> - <li><a href="#include">File inclusion</a></li> - <li><a href="#globallet">'let' expressions</a></li> - <li><a href="#foreach">'foreach' blocks</a></li> - </ol></li> - </ol></li> - <li><a href="#backends">TableGen backends</a> - <ol> - <li><a href="#">todo</a></li> - </ol></li> -</ul> -</div> - -<div class="doc_author"> - <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p> -</div> - -<!-- *********************************************************************** --> -<h2><a name="introduction">Introduction</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>TableGen's purpose is to help a human develop and maintain records of -domain-specific information. Because there may be a large number of these -records, it is specifically designed to allow writing flexible descriptions and -for common features of these records to be factored out. This reduces the -amount of duplication in the description, reduces the chance of error, and -makes it easier to structure domain specific information.</p> - -<p>The core part of TableGen <a href="#syntax">parses a file</a>, instantiates -the declarations, and hands the result off to a domain-specific "<a -href="#backends">TableGen backend</a>" for processing. The current major user -of TableGen is the <a href="CodeGenerator.html">LLVM code generator</a>.</p> - -<p>Note that if you work on TableGen much, and use emacs or vim, that you can -find an emacs "TableGen mode" and a vim language file in the -<tt>llvm/utils/emacs</tt> and <tt>llvm/utils/vim</tt> directories of your LLVM -distribution, respectively.</p> - -<!-- ======================================================================= --> -<h3><a name="concepts">Basic concepts</a></h3> - -<div> - -<p>TableGen files consist of two key parts: 'classes' and 'definitions', both -of which are considered 'records'.</p> - -<p><b>TableGen records</b> have a unique name, a list of values, and a list of -superclasses. The list of values is the main data that TableGen builds for each -record; it is this that holds the domain specific information for the -application. The interpretation of this data is left to a specific <a -href="#backends">TableGen backend</a>, but the structure and format rules are -taken care of and are fixed by TableGen.</p> - -<p><b>TableGen definitions</b> are the concrete form of 'records'. These -generally do not have any undefined values, and are marked with the -'<tt>def</tt>' keyword.</p> - -<p><b>TableGen classes</b> are abstract records that are used to build and -describe other records. These 'classes' allow the end-user to build -abstractions for either the domain they are targeting (such as "Register", -"RegisterClass", and "Instruction" in the LLVM code generator) or for the -implementor to help factor out common properties of records (such as "FPInst", -which is used to represent floating point instructions in the X86 backend). -TableGen keeps track of all of the classes that are used to build up a -definition, so the backend can find all definitions of a particular class, such -as "Instruction".</p> - -<p><b>TableGen multiclasses</b> are groups of abstract records that are -instantiated all at once. Each instantiation can result in multiple -TableGen definitions. If a multiclass inherits from another multiclass, -the definitions in the sub-multiclass become part of the current -multiclass, as if they were declared in the current multiclass.</p> - -</div> - -<!-- ======================================================================= --> -<h3><a name="example">An example record</a></h3> - -<div> - -<p>With no other arguments, TableGen parses the specified file and prints out -all of the classes, then all of the definitions. This is a good way to see what -the various definitions expand to fully. Running this on the <tt>X86.td</tt> -file prints this (at the time of this writing):</p> - -<div class="doc_code"> -<pre> -... -<b>def</b> ADD32rr { <i>// Instruction X86Inst I</i> - <b>string</b> Namespace = "X86"; - <b>dag</b> OutOperandList = (outs GR32:$dst); - <b>dag</b> InOperandList = (ins GR32:$src1, GR32:$src2); - <b>string</b> AsmString = "add{l}\t{$src2, $dst|$dst, $src2}"; - <b>list</b><dag> Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]; - <b>list</b><Register> Uses = []; - <b>list</b><Register> Defs = [EFLAGS]; - <b>list</b><Predicate> Predicates = []; - <b>int</b> CodeSize = 3; - <b>int</b> AddedComplexity = 0; - <b>bit</b> isReturn = 0; - <b>bit</b> isBranch = 0; - <b>bit</b> isIndirectBranch = 0; - <b>bit</b> isBarrier = 0; - <b>bit</b> isCall = 0; - <b>bit</b> canFoldAsLoad = 0; - <b>bit</b> mayLoad = 0; - <b>bit</b> mayStore = 0; - <b>bit</b> isImplicitDef = 0; - <b>bit</b> isConvertibleToThreeAddress = 1; - <b>bit</b> isCommutable = 1; - <b>bit</b> isTerminator = 0; - <b>bit</b> isReMaterializable = 0; - <b>bit</b> isPredicable = 0; - <b>bit</b> hasDelaySlot = 0; - <b>bit</b> usesCustomInserter = 0; - <b>bit</b> hasCtrlDep = 0; - <b>bit</b> isNotDuplicable = 0; - <b>bit</b> hasSideEffects = 0; - <b>bit</b> neverHasSideEffects = 0; - InstrItinClass Itinerary = NoItinerary; - <b>string</b> Constraints = ""; - <b>string</b> DisableEncoding = ""; - <b>bits</b><8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 }; - Format Form = MRMDestReg; - <b>bits</b><6> FormBits = { 0, 0, 0, 0, 1, 1 }; - ImmType ImmT = NoImm; - <b>bits</b><3> ImmTypeBits = { 0, 0, 0 }; - <b>bit</b> hasOpSizePrefix = 0; - <b>bit</b> hasAdSizePrefix = 0; - <b>bits</b><4> Prefix = { 0, 0, 0, 0 }; - <b>bit</b> hasREX_WPrefix = 0; - FPFormat FPForm = ?; - <b>bits</b><3> FPFormBits = { 0, 0, 0 }; -} -... -</pre> -</div> - -<p>This definition corresponds to a 32-bit register-register add instruction in -the X86. The string after the '<tt>def</tt>' string indicates the name of the -record—"<tt>ADD32rr</tt>" in this case—and the comment at the end of -the line indicates the superclasses of the definition. The body of the record -contains all of the data that TableGen assembled for the record, indicating that -the instruction is part of the "X86" namespace, the pattern indicating how the -the instruction should be emitted into the assembly file, that it is a -two-address instruction, has a particular encoding, etc. The contents and -semantics of the information in the record is specific to the needs of the X86 -backend, and is only shown as an example.</p> - -<p>As you can see, a lot of information is needed for every instruction -supported by the code generator, and specifying it all manually would be -unmaintainable, prone to bugs, and tiring to do in the first place. Because we -are using TableGen, all of the information was derived from the following -definition:</p> - -<div class="doc_code"> -<pre> -let Defs = [EFLAGS], - isCommutable = 1, <i>// X = ADD Y,Z --> X = ADD Z,Y</i> - isConvertibleToThreeAddress = 1 <b>in</b> <i>// Can transform into LEA.</i> -def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>; -</pre> -</div> - -<p>This definition makes use of the custom class <tt>I</tt> (extended from the -custom class <tt>X86Inst</tt>), which is defined in the X86-specific TableGen -file, to factor out the common features that instructions of its class share. A -key feature of TableGen is that it allows the end-user to define the -abstractions they prefer to use when describing their information.</p> - -<p>Each def record has a special entry called "NAME." This is the -name of the def ("ADD32rr" above). In the general case def names can -be formed from various kinds of string processing expressions and NAME -resolves to the final value obtained after resolving all of those -expressions. The user may refer to NAME anywhere she desires to use -the ultimate name of the def. NAME should not be defined anywhere -else in user code to avoid conflict problems.</p> - -</div> - -<!-- ======================================================================= --> -<h3><a name="running">Running TableGen</a></h3> - -<div> - -<p>TableGen runs just like any other LLVM tool. The first (optional) argument -specifies the file to read. If a filename is not specified, -<tt>llvm-tblgen</tt> reads from standard input.</p> - -<p>To be useful, one of the <a href="#backends">TableGen backends</a> must be -used. These backends are selectable on the command line (type '<tt>llvm-tblgen --help</tt>' for a list). For example, to get a list of all of the definitions -that subclass a particular type (which can be useful for building up an enum -list of these records), use the <tt>-print-enums</tt> option:</p> - -<div class="doc_code"> -<pre> -$ llvm-tblgen X86.td -print-enums -class=Register -AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX, -ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP, -MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D, -R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15, -R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI, -RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, -XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5, -XMM6, XMM7, XMM8, XMM9, - -$ llvm-tblgen X86.td -print-enums -class=Instruction -ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri, -ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8, -ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm, -ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr, -ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ... -</pre> -</div> - -<p>The default backend prints out all of the records, as described <a -href="#example">above</a>.</p> - -<p>If you plan to use TableGen, you will most likely have to <a -href="#backends">write a backend</a> that extracts the information specific to -what you need and formats it in the appropriate way.</p> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="syntax">TableGen syntax</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>TableGen doesn't care about the meaning of data (that is up to the backend to -define), but it does care about syntax, and it enforces a simple type system. -This section describes the syntax and the constructs allowed in a TableGen file. -</p> - -<!-- ======================================================================= --> -<h3><a name="primitives">TableGen primitives</a></h3> - -<div> - -<!-- --------------------------------------------------------------------------> -<h4><a name="comments">TableGen comments</a></h4> - -<div> - -<p>TableGen supports BCPL style "<tt>//</tt>" comments, which run to the end of -the line, and it also supports <b>nestable</b> "<tt>/* */</tt>" comments.</p> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="types">The TableGen type system</a> -</h4> - -<div> - -<p>TableGen files are strongly typed, in a simple (but complete) type-system. -These types are used to perform automatic conversions, check for errors, and to -help interface designers constrain the input that they allow. Every <a -href="#valuedef">value definition</a> is required to have an associated type. -</p> - -<p>TableGen supports a mixture of very low-level types (such as <tt>bit</tt>) -and very high-level types (such as <tt>dag</tt>). This flexibility is what -allows it to describe a wide range of information conveniently and compactly. -The TableGen types are:</p> - -<dl> -<dt><tt><b>bit</b></tt></dt> - <dd>A 'bit' is a boolean value that can hold either 0 or 1.</dd> - -<dt><tt><b>int</b></tt></dt> - <dd>The 'int' type represents a simple 32-bit integer value, such as 5.</dd> - -<dt><tt><b>string</b></tt></dt> - <dd>The 'string' type represents an ordered sequence of characters of - arbitrary length.</dd> - -<dt><tt><b>bits</b><n></tt></dt> - <dd>A 'bits' type is an arbitrary, but fixed, size integer that is broken up - into individual bits. This type is useful because it can handle some bits - being defined while others are undefined.</dd> - -<dt><tt><b>list</b><ty></tt></dt> - <dd>This type represents a list whose elements are some other type. The - contained type is arbitrary: it can even be another list type.</dd> - -<dt>Class type</dt> - <dd>Specifying a class name in a type context means that the defined value - must be a subclass of the specified class. This is useful in conjunction with - the <b><tt>list</tt></b> type, for example, to constrain the elements of the - list to a common base class (e.g., a <tt><b>list</b><Register></tt> can - only contain definitions derived from the "<tt>Register</tt>" class).</dd> - -<dt><tt><b>dag</b></tt></dt> - <dd>This type represents a nestable directed graph of elements.</dd> - -<dt><tt><b>code</b></tt></dt> - <dd>This represents a big hunk of text. This is lexically distinct from - string values because it doesn't require escapeing double quotes and other - common characters that occur in code.</dd> -</dl> - -<p>To date, these types have been sufficient for describing things that -TableGen has been used for, but it is straight-forward to extend this list if -needed.</p> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="values">TableGen values and expressions</a> -</h4> - -<div> - -<p>TableGen allows for a pretty reasonable number of different expression forms -when building up values. These forms allow the TableGen file to be written in a -natural syntax and flavor for the application. The current expression forms -supported include:</p> - -<dl> -<dt><tt>?</tt></dt> - <dd>uninitialized field</dd> -<dt><tt>0b1001011</tt></dt> - <dd>binary integer value</dd> -<dt><tt>07654321</tt></dt> - <dd>octal integer value (indicated by a leading 0)</dd> -<dt><tt>7</tt></dt> - <dd>decimal integer value</dd> -<dt><tt>0x7F</tt></dt> - <dd>hexadecimal integer value</dd> -<dt><tt>"foo"</tt></dt> - <dd>string value</dd> -<dt><tt>[{ ... }]</tt></dt> - <dd>code fragment</dd> -<dt><tt>[ X, Y, Z ]<type></tt></dt> - <dd>list value. <type> is the type of the list -element and is usually optional. In rare cases, -TableGen is unable to deduce the element type in -which case the user must specify it explicitly.</dd> -<dt><tt>{ a, b, c }</tt></dt> - <dd>initializer for a "bits<3>" value</dd> -<dt><tt>value</tt></dt> - <dd>value reference</dd> -<dt><tt>value{17}</tt></dt> - <dd>access to one bit of a value</dd> -<dt><tt>value{15-17}</tt></dt> - <dd>access to multiple bits of a value</dd> -<dt><tt>DEF</tt></dt> - <dd>reference to a record definition</dd> -<dt><tt>CLASS<val list></tt></dt> - <dd>reference to a new anonymous definition of CLASS with the specified - template arguments.</dd> -<dt><tt>X.Y</tt></dt> - <dd>reference to the subfield of a value</dd> -<dt><tt>list[4-7,17,2-3]</tt></dt> - <dd>A slice of the 'list' list, including elements 4,5,6,7,17,2, and 3 from - it. Elements may be included multiple times.</dd> -<dt><tt>foreach <var> = [ <list> ] in { <body> }</tt></dt> -<dt><tt>foreach <var> = [ <list> ] in <def></tt></dt> - <dd> Replicate <body> or <def>, replacing instances of - <var> with each value in <list>. <var> is scoped at the - level of the <tt>foreach</tt> loop and must not conflict with any other object - introduced in <body> or <def>. Currently only <tt>def</tt>s are - expanded within <body>. - </dd> -<dt><tt>foreach <var> = 0-15 in ...</tt></dt> -<dt><tt>foreach <var> = {0-15,32-47} in ...</tt></dt> - <dd>Loop over ranges of integers. The braces are required for multiple - ranges.</dd> -<dt><tt>(DEF a, b)</tt></dt> - <dd>a dag value. The first element is required to be a record definition, the - remaining elements in the list may be arbitrary other values, including nested - `<tt>dag</tt>' values.</dd> -<dt><tt>!strconcat(a, b)</tt></dt> - <dd>A string value that is the result of concatenating the 'a' and 'b' - strings.</dd> -<dt><tt>str1#str2</tt></dt> - <dd>"#" (paste) is a shorthand for !strconcat. It may concatenate - things that are not quoted strings, in which case an implicit - !cast<string> is done on the operand of the paste.</dd> -<dt><tt>!cast<type>(a)</tt></dt> - <dd>A symbol of type <em>type</em> obtained by looking up the string 'a' in -the symbol table. If the type of 'a' does not match <em>type</em>, TableGen -aborts with an error. !cast<string> is a special case in that the argument must -be an object defined by a 'def' construct.</dd> -<dt><tt>!subst(a, b, c)</tt></dt> - <dd>If 'a' and 'b' are of string type or are symbol references, substitute -'b' for 'a' in 'c.' This operation is analogous to $(subst) in GNU make.</dd> -<dt><tt>!foreach(a, b, c)</tt></dt> - <dd>For each member 'b' of dag or list 'a' apply operator 'c.' 'b' is a -dummy variable that should be declared as a member variable of an instantiated -class. This operation is analogous to $(foreach) in GNU make.</dd> -<dt><tt>!head(a)</tt></dt> - <dd>The first element of list 'a.'</dd> -<dt><tt>!tail(a)</tt></dt> - <dd>The 2nd-N elements of list 'a.'</dd> -<dt><tt>!empty(a)</tt></dt> - <dd>An integer {0,1} indicating whether list 'a' is empty.</dd> -<dt><tt>!if(a,b,c)</tt></dt> - <dd>'b' if the result of 'int' or 'bit' operator 'a' is nonzero, - 'c' otherwise.</dd> -<dt><tt>!eq(a,b)</tt></dt> - <dd>'bit 1' if string a is equal to string b, 0 otherwise. This - only operates on string, int and bit objects. Use !cast<string> to - compare other types of objects.</dd> -</dl> - -<p>Note that all of the values have rules specifying how they convert to values -for different types. These rules allow you to assign a value like "<tt>7</tt>" -to a "<tt>bits<4></tt>" value, for example.</p> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="classesdefs">Classes and definitions</a> -</h3> - -<div> - -<p>As mentioned in the <a href="#concepts">intro</a>, classes and definitions -(collectively known as 'records') in TableGen are the main high-level unit of -information that TableGen collects. Records are defined with a <tt>def</tt> or -<tt>class</tt> keyword, the record name, and an optional list of "<a -href="#templateargs">template arguments</a>". If the record has superclasses, -they are specified as a comma separated list that starts with a colon character -("<tt>:</tt>"). If <a href="#valuedef">value definitions</a> or <a -href="#recordlet">let expressions</a> are needed for the class, they are -enclosed in curly braces ("<tt>{}</tt>"); otherwise, the record ends with a -semicolon.</p> - -<p>Here is a simple TableGen file:</p> - -<div class="doc_code"> -<pre> -<b>class</b> C { <b>bit</b> V = 1; } -<b>def</b> X : C; -<b>def</b> Y : C { - <b>string</b> Greeting = "hello"; -} -</pre> -</div> - -<p>This example defines two definitions, <tt>X</tt> and <tt>Y</tt>, both of -which derive from the <tt>C</tt> class. Because of this, they both get the -<tt>V</tt> bit value. The <tt>Y</tt> definition also gets the Greeting member -as well.</p> - -<p>In general, classes are useful for collecting together the commonality -between a group of records and isolating it in a single place. Also, classes -permit the specification of default values for their subclasses, allowing the -subclasses to override them as they wish.</p> - -<!----------------------------------------------------------------------------> -<h4> - <a name="valuedef">Value definitions</a> -</h4> - -<div> - -<p>Value definitions define named entries in records. A value must be defined -before it can be referred to as the operand for another value definition or -before the value is reset with a <a href="#recordlet">let expression</a>. A -value is defined by specifying a <a href="#types">TableGen type</a> and a name. -If an initial value is available, it may be specified after the type with an -equal sign. Value definitions require terminating semicolons.</p> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="recordlet">'let' expressions</a> -</h4> - -<div> - -<p>A record-level let expression is used to change the value of a value -definition in a record. This is primarily useful when a superclass defines a -value that a derived class or definition wants to override. Let expressions -consist of the '<tt>let</tt>' keyword followed by a value name, an equal sign -("<tt>=</tt>"), and a new value. For example, a new class could be added to the -example above, redefining the <tt>V</tt> field for all of its subclasses:</p> - -<div class="doc_code"> -<pre> -<b>class</b> D : C { let V = 0; } -<b>def</b> Z : D; -</pre> -</div> - -<p>In this case, the <tt>Z</tt> definition will have a zero value for its "V" -value, despite the fact that it derives (indirectly) from the <tt>C</tt> class, -because the <tt>D</tt> class overrode its value.</p> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="templateargs">Class template arguments</a> -</h4> - -<div> - -<p>TableGen permits the definition of parameterized classes as well as normal -concrete classes. Parameterized TableGen classes specify a list of variable -bindings (which may optionally have defaults) that are bound when used. Here is -a simple example:</p> - -<div class="doc_code"> -<pre> -<b>class</b> FPFormat<<b>bits</b><3> val> { - <b>bits</b><3> Value = val; -} -<b>def</b> NotFP : FPFormat<0>; -<b>def</b> ZeroArgFP : FPFormat<1>; -<b>def</b> OneArgFP : FPFormat<2>; -<b>def</b> OneArgFPRW : FPFormat<3>; -<b>def</b> TwoArgFP : FPFormat<4>; -<b>def</b> CompareFP : FPFormat<5>; -<b>def</b> CondMovFP : FPFormat<6>; -<b>def</b> SpecialFP : FPFormat<7>; -</pre> -</div> - -<p>In this case, template arguments are used as a space efficient way to specify -a list of "enumeration values", each with a "<tt>Value</tt>" field set to the -specified integer.</p> - -<p>The more esoteric forms of <a href="#values">TableGen expressions</a> are -useful in conjunction with template arguments. As an example:</p> - -<div class="doc_code"> -<pre> -<b>class</b> ModRefVal<<b>bits</b><2> val> { - <b>bits</b><2> Value = val; -} - -<b>def</b> None : ModRefVal<0>; -<b>def</b> Mod : ModRefVal<1>; -<b>def</b> Ref : ModRefVal<2>; -<b>def</b> ModRef : ModRefVal<3>; - -<b>class</b> Value<ModRefVal MR> { - <i>// Decode some information into a more convenient format, while providing - // a nice interface to the user of the "Value" class.</i> - <b>bit</b> isMod = MR.Value{0}; - <b>bit</b> isRef = MR.Value{1}; - - <i>// other stuff...</i> -} - -<i>// Example uses</i> -<b>def</b> bork : Value<Mod>; -<b>def</b> zork : Value<Ref>; -<b>def</b> hork : Value<ModRef>; -</pre> -</div> - -<p>This is obviously a contrived example, but it shows how template arguments -can be used to decouple the interface provided to the user of the class from the -actual internal data representation expected by the class. In this case, -running <tt>llvm-tblgen</tt> on the example prints the following -definitions:</p> - -<div class="doc_code"> -<pre> -<b>def</b> bork { <i>// Value</i> - <b>bit</b> isMod = 1; - <b>bit</b> isRef = 0; -} -<b>def</b> hork { <i>// Value</i> - <b>bit</b> isMod = 1; - <b>bit</b> isRef = 1; -} -<b>def</b> zork { <i>// Value</i> - <b>bit</b> isMod = 0; - <b>bit</b> isRef = 1; -} -</pre> -</div> - -<p> This shows that TableGen was able to dig into the argument and extract a -piece of information that was requested by the designer of the "Value" class. -For more realistic examples, please see existing users of TableGen, such as the -X86 backend.</p> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="multiclass">Multiclass definitions and instances</a> -</h4> - -<div> - -<p> -While classes with template arguments are a good way to factor commonality -between two instances of a definition, multiclasses allow a convenient notation -for defining multiple definitions at once (instances of implicitly constructed -classes). For example, consider an 3-address instruction set whose instructions -come in two forms: "<tt>reg = reg op reg</tt>" and "<tt>reg = reg op imm</tt>" -(e.g. SPARC). In this case, you'd like to specify in one place that this -commonality exists, then in a separate place indicate what all the ops are. -</p> - -<p> -Here is an example TableGen fragment that shows this idea: -</p> - -<div class="doc_code"> -<pre> -<b>def</b> ops; -<b>def</b> GPR; -<b>def</b> Imm; -<b>class</b> inst<<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist>; - -<b>multiclass</b> ri_inst<<b>int</b> opc, <b>string</b> asmstr> { - def _rr : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), - (ops GPR:$dst, GPR:$src1, GPR:$src2)>; - def _ri : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), - (ops GPR:$dst, GPR:$src1, Imm:$src2)>; -} - -<i>// Instantiations of the ri_inst multiclass.</i> -<b>defm</b> ADD : ri_inst<0b111, "add">; -<b>defm</b> SUB : ri_inst<0b101, "sub">; -<b>defm</b> MUL : ri_inst<0b100, "mul">; -... -</pre> -</div> - -<p>The name of the resultant definitions has the multidef fragment names - appended to them, so this defines <tt>ADD_rr</tt>, <tt>ADD_ri</tt>, - <tt>SUB_rr</tt>, etc. A defm may inherit from multiple multiclasses, - instantiating definitions from each multiclass. Using a multiclass - this way is exactly equivalent to instantiating the classes multiple - times yourself, e.g. by writing:</p> - -<div class="doc_code"> -<pre> -<b>def</b> ops; -<b>def</b> GPR; -<b>def</b> Imm; -<b>class</b> inst<<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist>; - -<b>class</b> rrinst<<b>int</b> opc, <b>string</b> asmstr> - : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), - (ops GPR:$dst, GPR:$src1, GPR:$src2)>; - -<b>class</b> riinst<<b>int</b> opc, <b>string</b> asmstr> - : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), - (ops GPR:$dst, GPR:$src1, Imm:$src2)>; - -<i>// Instantiations of the ri_inst multiclass.</i> -<b>def</b> ADD_rr : rrinst<0b111, "add">; -<b>def</b> ADD_ri : riinst<0b111, "add">; -<b>def</b> SUB_rr : rrinst<0b101, "sub">; -<b>def</b> SUB_ri : riinst<0b101, "sub">; -<b>def</b> MUL_rr : rrinst<0b100, "mul">; -<b>def</b> MUL_ri : riinst<0b100, "mul">; -... -</pre> -</div> - -<p> -A defm can also be used inside a multiclass providing several levels of -multiclass instanciations. -</p> - -<div class="doc_code"> -<pre> -<b>class</b> Instruction<bits<4> opc, string Name> { - bits<4> opcode = opc; - string name = Name; -} - -<b>multiclass</b> basic_r<bits<4> opc> { - <b>def</b> rr : Instruction<opc, "rr">; - <b>def</b> rm : Instruction<opc, "rm">; -} - -<b>multiclass</b> basic_s<bits<4> opc> { - <b>defm</b> SS : basic_r<opc>; - <b>defm</b> SD : basic_r<opc>; - <b>def</b> X : Instruction<opc, "x">; -} - -<b>multiclass</b> basic_p<bits<4> opc> { - <b>defm</b> PS : basic_r<opc>; - <b>defm</b> PD : basic_r<opc>; - <b>def</b> Y : Instruction<opc, "y">; -} - -<b>defm</b> ADD : basic_s<0xf>, basic_p<0xf>; -... - -<i>// Results</i> -<b>def</b> ADDPDrm { ... -<b>def</b> ADDPDrr { ... -<b>def</b> ADDPSrm { ... -<b>def</b> ADDPSrr { ... -<b>def</b> ADDSDrm { ... -<b>def</b> ADDSDrr { ... -<b>def</b> ADDY { ... -<b>def</b> ADDX { ... -</pre> -</div> - -<p> -defm declarations can inherit from classes too, the -rule to follow is that the class list must start after the -last multiclass, and there must be at least one multiclass -before them. -</p> - -<div class="doc_code"> -<pre> -<b>class</b> XD { bits<4> Prefix = 11; } -<b>class</b> XS { bits<4> Prefix = 12; } - -<b>class</b> I<bits<4> op> { - bits<4> opcode = op; -} - -<b>multiclass</b> R { - <b>def</b> rr : I<4>; - <b>def</b> rm : I<2>; -} - -<b>multiclass</b> Y { - <b>defm</b> SS : R, XD; - <b>defm</b> SD : R, XS; -} - -<b>defm</b> Instr : Y; - -<i>// Results</i> -<b>def</b> InstrSDrm { - bits<4> opcode = { 0, 0, 1, 0 }; - bits<4> Prefix = { 1, 1, 0, 0 }; -} -... -<b>def</b> InstrSSrr { - bits<4> opcode = { 0, 1, 0, 0 }; - bits<4> Prefix = { 1, 0, 1, 1 }; -} -</pre> -</div> - -</div> - -</div> - -<!-- ======================================================================= --> -<h3> - <a name="filescope">File scope entities</a> -</h3> - -<div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="include">File inclusion</a> -</h4> - -<div> -<p>TableGen supports the '<tt>include</tt>' token, which textually substitutes -the specified file in place of the include directive. The filename should be -specified as a double quoted string immediately after the '<tt>include</tt>' -keyword. Example:</p> - -<div class="doc_code"> -<pre> -<b>include</b> "foo.td" -</pre> -</div> - -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="globallet">'let' expressions</a> -</h4> - -<div> - -<p>"Let" expressions at file scope are similar to <a href="#recordlet">"let" -expressions within a record</a>, except they can specify a value binding for -multiple records at a time, and may be useful in certain other cases. -File-scope let expressions are really just another way that TableGen allows the -end-user to factor out commonality from the records.</p> - -<p>File-scope "let" expressions take a comma-separated list of bindings to -apply, and one or more records to bind the values in. Here are some -examples:</p> - -<div class="doc_code"> -<pre> -<b>let</b> isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 <b>in</b> - <b>def</b> RET : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>; - -<b>let</b> isCall = 1 <b>in</b> - <i>// All calls clobber the non-callee saved registers...</i> - <b>let</b> Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, EFLAGS] <b>in</b> { - <b>def</b> CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops), - "call\t${dst:call}", []>; - <b>def</b> CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR32:$dst)]>; - <b>def</b> CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), - "call\t{*}$dst", []>; - } -</pre> -</div> - -<p>File-scope "let" expressions are often useful when a couple of definitions -need to be added to several records, and the records do not otherwise need to be -opened, as in the case with the <tt>CALL*</tt> instructions above.</p> - -<p>It's also possible to use "let" expressions inside multiclasses, providing -more ways to factor out commonality from the records, specially if using -several levels of multiclass instanciations. This also avoids the need of using -"let" expressions within subsequent records inside a multiclass.</p> - -<pre class="doc_code"> -<b>multiclass </b>basic_r<bits<4> opc> { - <b>let </b>Predicates = [HasSSE2] in { - <b>def </b>rr : Instruction<opc, "rr">; - <b>def </b>rm : Instruction<opc, "rm">; - } - <b>let </b>Predicates = [HasSSE3] in - <b>def </b>rx : Instruction<opc, "rx">; -} - -<b>multiclass </b>basic_ss<bits<4> opc> { - <b>let </b>IsDouble = 0 in - <b>defm </b>SS : basic_r<opc>; - - <b>let </b>IsDouble = 1 in - <b>defm </b>SD : basic_r<opc>; -} - -<b>defm </b>ADD : basic_ss<0xf>; -</pre> -</div> - -<!-- --------------------------------------------------------------------------> -<h4> - <a name="foreach">Looping</a> -</h4> - -<div> -<p>TableGen supports the '<tt>foreach</tt>' block, which textually replicates -the loop body, substituting iterator values for iterator references in the -body. Example:</p> - -<div class="doc_code"> -<pre> -<b>foreach</b> i = [0, 1, 2, 3] in { - <b>def</b> R#i : Register<...>; - <b>def</b> F#i : Register<...>; -} -</pre> -</div> - -<p>This will create objects <tt>R0</tt>, <tt>R1</tt>, <tt>R2</tt> and -<tt>R3</tt>. <tt>foreach</tt> blocks may be nested. If there is only -one item in the body the braces may be elided:</p> - -<div class="doc_code"> -<pre> -<b>foreach</b> i = [0, 1, 2, 3] in - <b>def</b> R#i : Register<...>; - -</pre> -</div> - -</div> - -</div> - -</div> - -<!-- *********************************************************************** --> -<h2><a name="codegen">Code Generator backend info</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>Expressions used by code generator to describe instructions and isel -patterns:</p> - -<dl> -<dt><tt>(implicit a)</tt></dt> - <dd>an implicitly defined physical register. This tells the dag instruction - selection emitter the input pattern's extra definitions matches implicit - physical register definitions.</dd> -</dl> -</div> - -<!-- *********************************************************************** --> -<h2><a name="backends">TableGen backends</a></h2> -<!-- *********************************************************************** --> - -<div> - -<p>TODO: How they work, how to write one. This section should not contain -details about any particular backend, except maybe -print-enums as an example. -This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p> - -</div> - -<!-- *********************************************************************** --> - -<hr> -<address> - <a href="http://jigsaw.w3.org/css-validator/check/referer"><img - src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a> - <a href="http://validator.w3.org/check/referer"><img - src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a> - - <a href="mailto:sabre@nondot.org">Chris Lattner</a><br> - <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br> - Last modified: $Date$ -</address> - -</body> -</html> diff --git a/docs/TableGenFundamentals.rst b/docs/TableGenFundamentals.rst new file mode 100644 index 0000000000..bfb2618998 --- /dev/null +++ b/docs/TableGenFundamentals.rst @@ -0,0 +1,799 @@ +.. _tablegen: + +===================== +TableGen Fundamentals +===================== + +.. contents:: + :local: + +Introduction +============ + +TableGen's purpose is to help a human develop and maintain records of +domain-specific information. Because there may be a large number of these +records, it is specifically designed to allow writing flexible descriptions and +for common features of these records to be factored out. This reduces the +amount of duplication in the description, reduces the chance of error, and makes +it easier to structure domain specific information. + +The core part of TableGen `parses a file`_, instantiates the declarations, and +hands the result off to a domain-specific `TableGen backend`_ for processing. +The current major user of TableGen is the `LLVM code +generator <CodeGenerator.html>`_. + +Note that if you work on TableGen much, and use emacs or vim, that you can find +an emacs "TableGen mode" and a vim language file in the ``llvm/utils/emacs`` and +``llvm/utils/vim`` directories of your LLVM distribution, respectively. + +.. _intro: + +Basic concepts +-------------- + +TableGen files consist of two key parts: 'classes' and 'definitions', both of +which are considered 'records'. + +**TableGen records** have a unique name, a list of values, and a list of +superclasses. The list of values is the main data that TableGen builds for each +record; it is this that holds the domain specific information for the +application. The interpretation of this data is left to a specific `TableGen +backend`_, but the structure and format rules are taken care of and are fixed by +TableGen. + +**TableGen definitions** are the concrete form of 'records'. These generally do +not have any undefined values, and are marked with the '``def``' keyword. + +**TableGen classes** are abstract records that are used to build and describe +other records. These 'classes' allow the end-user to build abstractions for +either the domain they are targeting (such as "Register", "RegisterClass", and +"Instruction" in the LLVM code generator) or for the implementor to help factor +out common properties of records (such as "FPInst", which is used to represent +floating point instructions in the X86 backend). TableGen keeps track of all of +the classes that are used to build up a definition, so the backend can find all +definitions of a particular class, such as "Instruction". + +**TableGen multiclasses** are groups of abstract records that are instantiated +all at once. Each instantiation can result in multiple TableGen definitions. +If a multiclass inherits from another multiclass, the definitions in the +sub-multiclass become part of the current multiclass, as if they were declared +in the current multiclass. + +.. _described above: + +An example record +----------------- + +With no other arguments, TableGen parses the specified file and prints out all +of the classes, then all of the definitions. This is a good way to see what the +various definitions expand to fully. Running this on the ``X86.td`` file prints +this (at the time of this writing): + +.. code-block:: llvm + + ... + def ADD32rr { // Instruction X86Inst I + string Namespace = "X86"; + dag OutOperandList = (outs GR32:$dst); + dag InOperandList = (ins GR32:$src1, GR32:$src2); + string AsmString = "add{l}\t{$src2, $dst|$dst, $src2}"; + list<dag> Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]; + list<Register> Uses = []; + list<Register> Defs = [EFLAGS]; + list<Predicate> Predicates = []; + int CodeSize = 3; + int AddedComplexity = 0; + bit isReturn = 0; + bit isBranch = 0; + bit isIndirectBranch = 0; + bit isBarrier = 0; + bit isCall = 0; + bit canFoldAsLoad = 0; + bit mayLoad = 0; + bit mayStore = 0; + bit isImplicitDef = 0; + bit isConvertibleToThreeAddress = 1; + bit isCommutable = 1; + bit isTerminator = 0; + bit isReMaterializable = 0; + bit isPredicable = 0; + bit hasDelaySlot = 0; + bit usesCustomInserter = 0; + bit hasCtrlDep = 0; + bit isNotDuplicable = 0; + bit hasSideEffects = 0; + bit neverHasSideEffects = 0; + InstrItinClass Itinerary = NoItinerary; + string Constraints = ""; + string DisableEncoding = ""; + bits<8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 }; + Format Form = MRMDestReg; + bits<6> FormBits = { 0, 0, 0, 0, 1, 1 }; + ImmType ImmT = NoImm; + bits<3> ImmTypeBits = { 0, 0, 0 }; + bit hasOpSizePrefix = 0; + bit hasAdSizePrefix = 0; + bits<4> Prefix = { 0, 0, 0, 0 }; + bit hasREX_WPrefix = 0; + FPFormat FPForm = ?; + bits<3> FPFormBits = { 0, 0, 0 }; + } + ... + +This definition corresponds to a 32-bit register-register add instruction in the +X86. The string after the '``def``' string indicates the name of the +record---"``ADD32rr``" in this case---and the comment at the end of the line +indicates the superclasses of the definition. The body of the record contains +all of the data that TableGen assembled for the record, indicating that the +instruction is part of the "X86" namespace, the pattern indicating how the the +instruction should be emitted into the assembly file, that it is a two-address +instruction, has a particular encoding, etc. The contents and semantics of the +information in the record is specific to the needs of the X86 backend, and is +only shown as an example. + +As you can see, a lot of information is needed for every instruction supported +by the code generator, and specifying it all manually would be unmaintainable, +prone to bugs, and tiring to do in the first place. Because we are using +TableGen, all of the information was derived from the following definition: + +.. code-block:: llvm + + let Defs = [EFLAGS], + isCommutable = 1, // X = ADD Y,Z --> X = ADD Z,Y + isConvertibleToThreeAddress = 1 in // Can transform into LEA. + def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>; + +This definition makes use of the custom class ``I`` (extended from the custom +class ``X86Inst``), which is defined in the X86-specific TableGen file, to +factor out the common features that instructions of its class share. A key +feature of TableGen is that it allows the end-user to define the abstractions +they prefer to use when describing their information. + +Each def record has a special entry called "``NAME``." This is the name of the +def ("``ADD32rr``" above). In the general case def names can be formed from +various kinds of string processing expressions and ``NAME`` resolves to the +final value obtained after resolving all of those expressions. The user may +refer to ``NAME`` anywhere she desires to use the ultimate name of the def. +``NAME`` should not be defined anywhere else in user code to avoid conflict +problems. + +Running TableGen +---------------- + +TableGen runs just like any other LLVM tool. The first (optional) argument +specifies the file to read. If a filename is not specified, ``llvm-tblgen`` +reads from standard input. + +To be useful, one of the `TableGen backends`_ must be used. These backends are +selectable on the command line (type '``llvm-tblgen -help``' for a list). For +example, to get a list of all of the definitions that subclass a particular type +(which can be useful for building up an enum list of these records), use the +``-print-enums`` option: + +.. code-block:: bash + + $ llvm-tblgen X86.td -print-enums -class=Register + AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX, + ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D, + R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15, + R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI, + RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, + XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5, + XMM6, XMM7, XMM8, XMM9, + + $ llvm-tblgen X86.td -print-enums -class=Instruction + ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri, + ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8, + ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm, + ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr, + ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ... + +The default backend prints out all of the records, as `described above`_. + +If you plan to use TableGen, you will most likely have to `write a backend`_ +that extracts the information specific to what you need and formats it in the +appropriate way. + +.. _parses a file: + +TableGen syntax +=============== + +TableGen doesn't care about the meaning of data (that is up to the backend to +define), but it does care about syntax, and it enforces a simple type system. +This section describes the syntax and the constructs allowed in a TableGen file. + +TableGen primitives +------------------- + +TableGen comments +^^^^^^^^^^^^^^^^^ + +TableGen supports BCPL style "``//``" comments, which run to the end of the +line, and it also supports **nestable** "``/* */``" comments. + +.. _TableGen type: + +The TableGen type system +^^^^^^^^^^^^^^^^^^^^^^^^ + +TableGen files are strongly typed, in a simple (but complete) type-system. +These types are used to perform automatic conversions, check for errors, and to +help interface designers constrain the input that they allow. Every `value +definition`_ is required to have an associated type. + +TableGen supports a mixture of very low-level types (such as ``bit``) and very +high-level types (such as ``dag``). This flexibility is what allows it to +describe a wide range of information conveniently and compactly. The TableGen +types are: + +``bit`` + A 'bit' is a boolean value that can hold either 0 or 1. + +``int`` + The 'int' type represents a simple 32-bit integer value, such as 5. + +``string`` + The 'string' type represents an ordered sequence of characters of arbitrary + length. + +``bits<n>`` + A 'bits' type is an arbitrary, but fixed, size integer that is broken up + into individual bits. This type is useful because it can handle some bits + being defined while others are undefined. + +``list<ty>`` + This type represents a list whose elements are some other type. The + contained type is arbitrary: it can even be another list type. + +Class type + Specifying a class name in a type context means that the defined value must + be a subclass of the specified class. This is useful in conjunction with + the ``list`` type, for example, to constrain the elements of the list to a + common base class (e.g., a ``list<Register>`` can only contain definitions + derived from the "``Register``" class). + +``dag`` + This type represents a nestable directed graph of elements. + +``code`` + This represents a big hunk of text. This is lexically distinct from string + values because it doesn't require escaping double quotes and other common + characters that occur in code. + +To date, these types have been sufficient for describing things that TableGen +has been used for, but it is straight-forward to extend this list if needed. + +.. _TableGen expressions: + +TableGen values and expressions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +TableGen allows for a pretty reasonable number of different expression forms +when building up values. These forms allow the TableGen file to be written in a +natural syntax and flavor for the application. The current expression forms +supported include: + +``?`` + uninitialized field + +``0b1001011`` + binary integer value + +``07654321`` + octal integer value (indicated by a leading 0) + +``7`` + decimal integer value + +``0x7F`` + hexadecimal integer value + +``"foo"`` + string value + +``[{ ... }]`` + code fragment + +``[ X, Y, Z ]<type>`` + list value. <type> is the type of the list element and is usually optional. + In rare cases, TableGen is unable to deduce the element type in which case + the user must specify it explicitly. + +``{ a, b, c }`` + initializer for a "bits<3>" value + +``value`` + value reference + +``value{17}`` + access to one bit of a value + +``value{15-17}`` + access to multiple bits of a value + +``DEF`` + reference to a record definition + +``CLASS<val list>`` + reference to a new anonymous definition of CLASS with the specified template + arguments. + +``X.Y`` + reference to the subfield of a value + +``list[4-7,17,2-3]`` + A slice of the 'list' list, including elements 4,5,6,7,17,2, and 3 from it. + Elements may be included multiple times. + +``foreach <var> = [ <list> ] in { <body> }`` + +``foreach <var> = [ <list> ] in <def>`` + Replicate <body> or <def>, replacing instances of <var> with each value + in <list>. <var> is scoped at the level of the ``foreach`` loop and must + not conflict with any other object introduced in <body> or <def>. Currently + only ``def``\s are expanded within <body>. + +``foreach <var> = 0-15 in ...`` + +``foreach <var> = {0-15,32-47} in ...`` + Loop over ranges of integers. The braces are required for multiple ranges. + +``(DEF a, b)`` + a dag value. The first element is required to be a record definition, the + remaining elements in the list may be arbitrary other values, including + nested ```dag``' values. + +``!strconcat(a, b)`` + A string value that is the result of concatenating the 'a' and 'b' strings. + +``str1#str2`` + "#" (paste) is a shorthand for !strconcat. It may concatenate things that + are not quoted strings, in which case an implicit !cast<string> is done on + the operand of the paste. + +``!cast<type>(a)`` + A symbol of type *type* obtained by looking up the string 'a' in the symbol + table. If the type of 'a' does not match *type*, TableGen aborts with an + error. !cast<string> is a special case in that the argument must be an + object defined by a 'def' construct. + +``!subst(a, b, c)`` + If 'a' and 'b' are of string type or are symbol references, substitute 'b' + for 'a' in 'c.' This operation is analogous to $(subst) in GNU make. + +``!foreach(a, b, c)`` + For each member 'b' of dag or list 'a' apply operator 'c.' 'b' is a dummy + variable that should be declared as a member variable of an instantiated + class. This operation is analogous to $(foreach) in GNU make. + +``!head(a)`` + The first element of list 'a.' + +``!tail(a)`` + The 2nd-N elements of list 'a.' + +``!empty(a)`` + An integer {0,1} indicating whether list 'a' is empty. + +``!if(a,b,c)`` + 'b' if the result of 'int' or 'bit' operator 'a' is nonzero, 'c' otherwise. + +``!eq(a,b)`` + 'bit 1' if string a is equal to string b, 0 otherwise. This only operates + on string, int and bit objects. Use !cast<string> to compare other types of + objects. + +Note that all of the values have rules specifying how they convert to values +for different types. These rules allow you to assign a value like "``7``" +to a "``bits<4>``" value, for example. + +Classes and definitions +----------------------- + +As mentioned in the `intro`_, classes and definitions (collectively known as +'records') in TableGen are the main high-level unit of information that TableGen +collects. Records are defined with a ``def`` or ``class`` keyword, the record +name, and an optional list of "`template arguments`_". If the record has +superclasses, they are specified as a comma separated list that starts with a +colon character ("``:``"). If `value definitions`_ or `let expressions`_ are +needed for the class, they are enclosed in curly braces ("``{}``"); otherwise, +the record ends with a semicolon. + +Here is a simple TableGen file: + +.. code-block:: llvm + + class C { bit V = 1; } + def X : C; + def Y : C { + string Greeting = "hello"; + } + +This example defines two definitions, ``X`` and ``Y``, both of which derive from +the ``C`` class. Because of this, they both get the ``V`` bit value. The ``Y`` +definition also gets the Greeting member as well. + +In general, classes are useful for collecting together the commonality between a +group of records and isolating it in a single place. Also, classes permit the +specification of default values for their subclasses, allowing the subclasses to +override them as they wish. + +.. _value definition: +.. _value definitions: + +Value definitions +^^^^^^^^^^^^^^^^^ + +Value definitions define named entries in records. A value must be defined +before it can be referred to as the operand for another value definition or +before the value is reset with a `let expression`_. A value is defined by +specifying a `TableGen type`_ and a name. If an initial value is available, it +may be specified after the type with an equal sign. Value definitions require +terminating semicolons. + +.. _let expression: +.. _let expressions: +.. _"let" expressions within a record: + +'let' expressions +^^^^^^^^^^^^^^^^^ + +A record-level let expression is used to change the value of a value definition +in a record. This is primarily useful when a superclass defines a value that a +derived class or definition wants to override. Let expressions consist of the +'``let``' keyword followed by a value name, an equal sign ("``=``"), and a new +value. For example, a new class could be added to the example above, redefining +the ``V`` field for all of its subclasses: + +.. code-block:: llvm + + class D : C { let V = 0; } + def Z : D; + +In this case, the ``Z`` definition will have a zero value for its ``V`` value, +despite the fact that it derives (indirectly) from the ``C`` class, because the +``D`` class overrode its value. + +.. _template arguments: + +Class template arguments +^^^^^^^^^^^^^^^^^^^^^^^^ + +TableGen permits the definition of parameterized classes as well as normal +concrete classes. Parameterized TableGen classes specify a list of variable +bindings (which may optionally have defaults) that are bound when used. Here is +a simple example: + +.. code-block:: llvm + + class FPFormat<bits<3> val> { + bits<3> Value = val; + } + def NotFP : FPFormat<0>; + def ZeroArgFP : FPFormat<1>; + def OneArgFP : FPFormat<2>; + def OneArgFPRW : FPFormat<3>; + def TwoArgFP : FPFormat<4>; + def CompareFP : FPFormat<5>; + def CondMovFP : FPFormat<6>; + def SpecialFP : FPFormat<7>; + +In this case, template arguments are used as a space efficient way to specify a +list of "enumeration values", each with a "``Value``" field set to the specified +integer. + +The more esoteric forms of `TableGen expressions`_ are useful in conjunction +with template arguments. As an example: + +.. code-block:: llvm + + class ModRefVal<bits<2> val> { + bits<2> Value = val; + } + + def None : ModRefVal<0>; + def Mod : ModRefVal<1>; + def Ref : ModRefVal<2>; + def ModRef : ModRefVal<3>; + + class Value<ModRefVal MR> { + // Decode some information into a more convenient format, while providing + // a nice interface to the user of the "Value" class. + bit isMod = MR.Value{0}; + bit isRef = MR.Value{1}; + + // other stuff... + } + + // Example uses + def bork : Value<Mod>; + def zork : Value<Ref>; + def hork : Value<ModRef>; + +This is obviously a contrived example, but it shows how template arguments can +be used to decouple the interface provided to the user of the class from the +actual internal data representation expected by the class. In this case, +running ``llvm-tblgen`` on the example prints the following definitions: + +.. code-block:: llvm + + def bork { // Value + bit isMod = 1; + bit isRef = 0; + } + def hork { // Value + bit isMod = 1; + bit isRef = 1; + } + def zork { // Value + bit isMod = 0; + bit isRef = 1; + } + +This shows that TableGen was able to dig into the argument and extract a piece +of information that was requested by the designer of the "Value" class. For +more realistic examples, please see existing users of TableGen, such as the X86 +backend. + +Multiclass definitions and instances +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While classes with template arguments are a good way to factor commonality +between two instances of a definition, multiclasses allow a convenient notation +for defining multiple definitions at once (instances of implicitly constructed +classes). For example, consider an 3-address instruction set whose instructions +come in two forms: "``reg = reg op reg``" and "``reg = reg op imm``" +(e.g. SPARC). In this case, you'd like to specify in one place that this +commonality exists, then in a separate place indicate what all the ops are. + +Here is an example TableGen fragment that shows this idea: + +.. code-block:: llvm + + def ops; + def GPR; + def Imm; + class inst<int opc, string asmstr, dag operandlist>; + + multiclass ri_inst<int opc, string asmstr> { + def _rr : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), + (ops GPR:$dst, GPR:$src1, GPR:$src2)>; + def _ri : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), + (ops GPR:$dst, GPR:$src1, Imm:$src2)>; + } + + // Instantiations of the ri_inst multiclass. + defm ADD : ri_inst<0b111, "add">; + defm SUB : ri_inst<0b101, "sub">; + defm MUL : ri_inst<0b100, "mul">; + ... + +The name of the resultant definitions has the multidef fragment names appended +to them, so this defines ``ADD_rr``, ``ADD_ri``, ``SUB_rr``, etc. A defm may +inherit from multiple multiclasses, instantiating definitions from each +multiclass. Using a multiclass this way is exactly equivalent to instantiating +the classes multiple times yourself, e.g. by writing: + +.. code-block:: llvm + + def ops; + def GPR; + def Imm; + class inst<int opc, string asmstr, dag operandlist>; + + class rrinst<int opc, string asmstr> + : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), + (ops GPR:$dst, GPR:$src1, GPR:$src2)>; + + class riinst<int opc, string asmstr> + : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"), + (ops GPR:$dst, GPR:$src1, Imm:$src2)>; + + // Instantiations of the ri_inst multiclass. + def ADD_rr : rrinst<0b111, "add">; + def ADD_ri : riinst<0b111, "add">; + def SUB_rr : rrinst<0b101, "sub">; + def SUB_ri : riinst<0b101, "sub">; + def MUL_rr : rrinst<0b100, "mul">; + def MUL_ri : riinst<0b100, "mul">; + ... + +A ``defm`` can also be used inside a multiclass providing several levels of +multiclass instanciations. + +.. code-block:: llvm + + class Instruction<bits<4> opc, string Name> { + bits<4> opcode = opc; + string name = Name; + } + + multiclass basic_r<bits<4> opc> { + def rr : Instruction<opc, "rr">; + def rm : Instruction<opc, "rm">; + } + + multiclass basic_s<bits<4> opc> { + defm SS : basic_r<opc>; + defm SD : basic_r<opc>; + def X : Instruction<opc, "x">; + } + + multiclass basic_p<bits<4> opc> { + defm PS : basic_r<opc>; + defm PD : basic_r<opc>; + def Y : Instruction<opc, "y">; + } + + defm ADD : basic_s<0xf>, basic_p<0xf>; + ... + + // Results + def ADDPDrm { ... + def ADDPDrr { ... + def ADDPSrm { ... + def ADDPSrr { ... + def ADDSDrm { ... + def ADDSDrr { ... + def ADDY { ... + def ADDX { ... + +``defm`` declarations can inherit from classes too, the rule to follow is that +the class list must start after the last multiclass, and there must be at least +one multiclass before them. + +.. code-block:: llvm + + class XD { bits<4> Prefix = 11; } + class XS { bits<4> Prefix = 12; } + + class I<bits<4> op> { + bits<4> opcode = op; + } + + multiclass R { + def rr : I<4>; + def rm : I<2>; + } + + multiclass Y { + defm SS : R, XD; + defm SD : R, XS; + } + + defm Instr : Y; + + // Results + def InstrSDrm { + bits<4> opcode = { 0, 0, 1, 0 }; + bits<4> Prefix = { 1, 1, 0, 0 }; + } + ... + def InstrSSrr { + bits<4> opcode = { 0, 1, 0, 0 }; + bits<4> Prefix = { 1, 0, 1, 1 }; + } + +File scope entities +------------------- + +File inclusion +^^^^^^^^^^^^^^ + +TableGen supports the '``include``' token, which textually substitutes the +specified file in place of the include directive. The filename should be +specified as a double quoted string immediately after the '``include``' keyword. +Example: + +.. code-block:: llvm + + include "foo.td" + +'let' expressions +^^^^^^^^^^^^^^^^^ + +"Let" expressions at file scope are similar to `"let" expressions within a +record`_, except they can specify a value binding for multiple records at a +time, and may be useful in certain other cases. File-scope let expressions are +really just another way that TableGen allows the end-user to factor out +commonality from the records. + +File-scope "let" expressions take a comma-separated list of bindings to apply, +and one or more records to bind the values in. Here are some examples: + +.. code-block:: llvm + + let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in + def RET : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>; + + let isCall = 1 in + // All calls clobber the non-callee saved registers... + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, EFLAGS] in { + def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops), + "call\t${dst:call}", []>; + def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), + "call\t{*}$dst", [(X86call GR32:$dst)]>; + def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), + "call\t{*}$dst", []>; + } + +File-scope "let" expressions are often useful when a couple of definitions need +to be added to several records, and the records do not otherwise need to be +opened, as in the case with the ``CALL*`` instructions above. + +It's also possible to use "let" expressions inside multiclasses, providing more +ways to factor out commonality from the records, specially if using several +levels of multiclass instanciations. This also avoids the need of using "let" +expressions within subsequent records inside a multiclass. + +.. code-block:: llvm + + multiclass basic_r<bits<4> opc> { + let Predicates = [HasSSE2] in { + def rr : Instruction<opc, "rr">; + def rm : Instruction<opc, "rm">; + } + let Predicates = [HasSSE3] in + def rx : Instruction<opc, "rx">; + } + + multiclass basic_ss<bits<4> opc> { + let IsDouble = 0 in + defm SS : basic_r<opc>; + + let IsDouble = 1 in + defm SD : basic_r<opc>; + } + + defm ADD : basic_ss<0xf>; + +Looping +^^^^^^^ + +TableGen supports the '``foreach``' block, which textually replicates the loop +body, substituting iterator values for iterator references in the body. +Example: + +.. code-block:: llvm + + foreach i = [0, 1, 2, 3] in { + def R#i : Register<...>; + def F#i : Register<...>; + } + +This will create objects ``R0``, ``R1``, ``R2`` and ``R3``. ``foreach`` blocks +may be nested. If there is only one item in the body the braces may be +elided: + +.. code-block:: llvm + + foreach i = [0, 1, 2, 3] in + def R#i : Register<...>; + +Code Generator backend info +=========================== + +Expressions used by code generator to describe instructions and isel patterns: + +``(implicit a)`` + an implicitly defined physical register. This tells the dag instruction + selection emitter the input pattern's extra definitions matches implicit + physical register definitions. + +.. _TableGen backend: +.. _TableGen backends: +.. _write a backend: + +TableGen backends +================= + +TODO: How they work, how to write one. This section should not contain details +about any particular backend, except maybe ``-print-enums`` as an example. This +should highlight the APIs in ``TableGen/Record.h``. diff --git a/docs/design_and_overview.rst b/docs/design_and_overview.rst index 158190ec85..ea684155e0 100644 --- a/docs/design_and_overview.rst +++ b/docs/design_and_overview.rst @@ -3,29 +3,34 @@ LLVM Design & Overview ====================== - * `LLVM Language Reference Manual <LangRef.html>`_ +.. toctree:: + :hidden: - Defines the LLVM intermediate representation. + GetElementPtr - * `Introduction to the LLVM Compiler <http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html>`_ +* `LLVM Language Reference Manual <LangRef.html>`_ - Presentation providing a users introduction to LLVM. + Defines the LLVM intermediate representation. - * `Intro to LLVM <http://www.aosabook.org/en/llvm.html>`_ +* `Introduction to the LLVM Compiler <http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html>`_ - Book chapter providing a compiler hacker's introduction to LLVM. + Presentation providing a users introduction to LLVM. - * `LLVM: A Compilation Framework forLifelong Program Analysis & Transformation - <http://llvm.org/pubs/2004-01-30-CGO-LLVM.html>`_ +* `Intro to LLVM <http://www.aosabook.org/en/llvm.html>`_ - Design overview. + Book chapter providing a compiler hacker's introduction to LLVM. - * `LLVM: An Infrastructure for Multi-Stage Optimization - <http://llvm.org/pubs/2002-12-LattnerMSThesis.html>`_ +* `LLVM: A Compilation Framework forLifelong Program Analysis & Transformation + <http://llvm.org/pubs/2004-01-30-CGO-LLVM.html>`_ - More details (quite old now). + Design overview. - * `GetElementPtr FAQ <GetElementPtr.html>`_ +* `LLVM: An Infrastructure for Multi-Stage Optimization + <http://llvm.org/pubs/2002-12-LattnerMSThesis.html>`_ - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. + More details (quite old now). + +* :ref:`gep` + + Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. diff --git a/docs/development_process.rst b/docs/development_process.rst index f73bbf4cd9..4fc20b3412 100644 --- a/docs/development_process.rst +++ b/docs/development_process.rst @@ -3,22 +3,28 @@ Development Process Documentation ================================= - * `LLVM Project Guide <Projects.html>`_ +.. toctree:: + :hidden: - How-to guide and templates for new projects that *use* the LLVM - infrastructure. The templates (directory organization, Makefiles, and test - tree) allow the project code to be located outside (or inside) the ``llvm/`` - tree, while using LLVM header files and libraries. + MakefileGuide + Projects - * `LLVMBuild Documentation <LLVMBuild.html>`_ +* :ref:`projects` - Describes the LLVMBuild organization and files used by LLVM to specify - component descriptions. + How-to guide and templates for new projects that *use* the LLVM + infrastructure. The templates (directory organization, Makefiles, and test + tree) allow the project code to be located outside (or inside) the ``llvm/`` + tree, while using LLVM header files and libraries. - * `LLVM Makefile Guide <MakefileGuide.html>`_ +* `LLVMBuild Documentation <LLVMBuild.html>`_ - Describes how the LLVM makefiles work and how to use them. + Describes the LLVMBuild organization and files used by LLVM to specify + component descriptions. - * `How To Release LLVM To The Public <HowToReleaseLLVM.html>`_ +* :ref:`makefile_guide` - This is a guide to preparing LLVM releases. Most developers can ignore it. + Describes how the LLVM makefiles work and how to use them. + +* `How To Release LLVM To The Public <HowToReleaseLLVM.html>`_ + + This is a guide to preparing LLVM releases. Most developers can ignore it. diff --git a/docs/llvm-theme/static/llvm-theme.css b/docs/llvm-theme/static/llvm-theme.css index da4f648061..f684d00ce4 100644 --- a/docs/llvm-theme/static/llvm-theme.css +++ b/docs/llvm-theme/static/llvm-theme.css @@ -140,7 +140,7 @@ div.footer a { /* -- body styles ----------------------------------------------------------- */ -p { +p { margin: 0.8em 0 0.5em 0; } @@ -153,7 +153,7 @@ a:hover { color: #2491CF; } -div.body a { +div.body p a{ text-decoration: underline; } @@ -175,10 +175,35 @@ h3 { font-size: 1.2em; } +h3 a:hover { + text-decoration: underline; +} + div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { color: black!important; } +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + background-color: #f2f2f2; + font-weight: normal; + color: #20435c; + border-bottom: 1px solid #ccc; + margin: 20px -20px 10px -20px; + padding: 3px 0 3px 10px; +} + +div.body h1 { margin-top: 0; font-size: 200%; } +div.body h2 { font-size: 160%; } +div.body h3 { font-size: 140%; } +div.body h4 { font-size: 120%; } +div.body h5 { font-size: 110%; } +div.body h6 { font-size: 100%; } + h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { display: none; margin: 0 0 0 0.3em; @@ -217,7 +242,7 @@ cite, code, tt { letter-spacing: 0.01em; } -tt { +:not(a.reference) > tt { background-color: #f2f2f2; border-bottom: 1px solid #ddd; color: #333; @@ -232,15 +257,19 @@ hr { margin: 2em; } -a tt { +p a tt { border: 0; color: #CA7900; } -a tt:hover { +p a tt:hover { color: #2491CF; } +a tt { + border: none; +} + pre { font-family: 'Consolas', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; diff --git a/docs/programming.rst b/docs/programming.rst index b198d97cd1..add923f899 100644 --- a/docs/programming.rst +++ b/docs/programming.rst @@ -3,32 +3,37 @@ Programming Documentation ========================= - * `LLVM Language Reference Manual <LangRef.html>`_ +.. toctree:: + :hidden: - Defines the LLVM intermediate representation and the assembly form of the - different nodes. + CodingStandards - * `The LLVM Programmers Manual <ProgrammersManual.html>`_ +* `LLVM Language Reference Manual <LangRef.html>`_ - Introduction to the general layout of the LLVM sourcebase, important classes - and APIs, and some tips & tricks. + Defines the LLVM intermediate representation and the assembly form of the + different nodes. - * `CommandLine library Reference Manual <CommandLine.html>`_ +* `The LLVM Programmers Manual <ProgrammersManual.html>`_ - Provides information on using the command line parsing library. + Introduction to the general layout of the LLVM sourcebase, important classes + and APIs, and some tips & tricks. - * `LLVM Coding standards <CodingStandards.html>`_ +* `CommandLine library Reference Manual <CommandLine.html>`_ - Details the LLVM coding standards and provides useful information on writing - efficient C++ code. + Provides information on using the command line parsing library. - * `Extending LLVM <ExtendingLLVM.html>`_ +* :ref:`coding_standards` - Look here to see how to add instructions and intrinsics to LLVM. + Details the LLVM coding standards and provides useful information on writing + efficient C++ code. - * `Doxygen generated documentation <http://llvm.org/doxygen/>`_ +* `Extending LLVM <ExtendingLLVM.html>`_ - (`classes <http://llvm.org/doxygen/inherits.html>`_) - (`tarball <http://llvm.org/doxygen/doxygen.tar.gz>`_) + Look here to see how to add instructions and intrinsics to LLVM. - * `ViewVC Repository Browser <http://llvm.org/viewvc/>`_ +* `Doxygen generated documentation <http://llvm.org/doxygen/>`_ + + (`classes <http://llvm.org/doxygen/inherits.html>`_) + (`tarball <http://llvm.org/doxygen/doxygen.tar.gz>`_) + +* `ViewVC Repository Browser <http://llvm.org/viewvc/>`_ diff --git a/docs/subsystems.rst b/docs/subsystems.rst index 3a0db7878d..c4c3b6d595 100644 --- a/docs/subsystems.rst +++ b/docs/subsystems.rst @@ -3,72 +3,88 @@ Subsystem Documentation ======================= - * `Writing an LLVM Pass <WritingAnLLVMPass.html>`_ - - Information on how to write LLVM transformations and analyses. - - * `Writing an LLVM Backend <WritingAnLLVMBackend.html>`_ +.. toctree:: + :hidden: + + AliasAnalysis + BitCodeFormat + BranchWeightMetadata + Bugpoint + ExceptionHandling + LinkTimeOptimization + SegmentedStacks + TableGenFundamentals + +* `Writing an LLVM Pass <WritingAnLLVMPass.html>`_ - Information on how to write LLVM backends for machine targets. + Information on how to write LLVM transformations and analyses. - * `The LLVM Target-Independent Code Generator <CodeGenerator.html>`_ +* `Writing an LLVM Backend <WritingAnLLVMBackend.html>`_ - The design and implementation of the LLVM code generator. Useful if you are - working on retargetting LLVM to a new architecture, designing a new codegen - pass, or enhancing existing components. + Information on how to write LLVM backends for machine targets. - * `TableGen Fundamentals <TableGenFundamentals.html>`_ +* `The LLVM Target-Independent Code Generator <CodeGenerator.html>`_ - Describes the TableGen tool, which is used heavily by the LLVM code - generator. + The design and implementation of the LLVM code generator. Useful if you are + working on retargetting LLVM to a new architecture, designing a new codegen + pass, or enhancing existing components. - * `Alias Analysis in LLVM <AliasAnalysis.html>`_ +* :ref:`tablegen` + + Describes the TableGen tool, which is used heavily by the LLVM code + generator. - Information on how to write a new alias analysis implementation or how to - use existing analyses. +* :ref:`alias_analysis` - * `Accurate Garbage Collection with LLVM <GarbageCollection.html>`_ + Information on how to write a new alias analysis implementation or how to + use existing analyses. - The interfaces source-language compilers should use for compiling GC'd - programs. +* `Accurate Garbage Collection with LLVM <GarbageCollection.html>`_ - * `Source Level Debugging with LLVM <SourceLevelDebugging.html>`_ + The interfaces source-language compilers should use for compiling GC'd + programs. + +* `Source Level Debugging with LLVM <SourceLevelDebugging.html>`_ - This document describes the design and philosophy behind the LLVM - source-level debugger. + This document describes the design and philosophy behind the LLVM + source-level debugger. - * `Zero Cost Exception handling in LLVM <ExceptionHandling.html>`_ +* :ref:`exception_handling` - This document describes the design and implementation of exception handling - in LLVM. + This document describes the design and implementation of exception handling + in LLVM. - * `Bugpoint <Bugpoint.html>`_ +* :ref:`bugpoint` - Automatic bug finder and test-case reducer description and usage - information. + Automatic bug finder and test-case reducer description and usage + information. - * `LLVM Bitcode File Format <BitCodeFormat.html>`_ +* :ref:`bitcode_format` - This describes the file format and encoding used for LLVM "bc" files. + This describes the file format and encoding used for LLVM "bc" files. - * `System Library <SystemLibrary.html>`_ +* `System Library <SystemLibrary.html>`_ - This document describes the LLVM System Library (<tt>lib/System</tt>) and - how to keep LLVM source code portable + This document describes the LLVM System Library (<tt>lib/System</tt>) and + how to keep LLVM source code portable - * `Link Time Optimization <LinkTimeOptimization.html>`_ +* :ref:`lto` - This document describes the interface between LLVM intermodular optimizer - and the linker and its design + This document describes the interface between LLVM intermodular optimizer + and the linker and its design - * `The LLVM gold plugin <GoldPlugin.html>`_ +* `The LLVM gold plugin <GoldPlugin.html>`_ - How to build your programs with link-time optimization on Linux. + How to build your programs with link-time optimization on Linux. - * `The GDB JIT interface <DebuggingJITedCode.html>`_ +* `The GDB JIT interface <DebuggingJITedCode.html>`_ - How to debug JITed code with GDB. + How to debug JITed code with GDB. - * `Branch Weight Metadata <BranchWeightMetadata.html>`_ +* :ref:`branch_weight` - Provides information about Branch Prediction Information. + Provides information about Branch Prediction Information. + +* :ref:`segmented_stacks` + + This document describes segmented stacks and how they are used in LLVM. diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html index 4e9ac3ad5f..57ff7373f6 100644 --- a/docs/tutorial/LangImpl3.html +++ b/docs/tutorial/LangImpl3.html @@ -685,10 +685,10 @@ clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy // See example below. #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Support/IRBuilder.h" #include <cstdio> #include <string> #include <map> diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html index 8c5c31e762..453e43a02e 100644 --- a/docs/tutorial/LangImpl4.html +++ b/docs/tutorial/LangImpl4.html @@ -517,6 +517,7 @@ at runtime.</p> #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -524,7 +525,6 @@ at runtime.</p> #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html index 0bb7e4f711..2d406df3aa 100644 --- a/docs/tutorial/LangImpl5.html +++ b/docs/tutorial/LangImpl5.html @@ -895,6 +895,7 @@ clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -902,7 +903,6 @@ clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html index 453b576dd3..9c606ae11d 100644 --- a/docs/tutorial/LangImpl6.html +++ b/docs/tutorial/LangImpl6.html @@ -834,6 +834,7 @@ library, although doing that will cause problems on Windows.</p> #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -841,7 +842,6 @@ library, although doing that will cause problems on Windows.</p> #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html index 90bdeee16a..08c0c716b6 100644 --- a/docs/tutorial/LangImpl7.html +++ b/docs/tutorial/LangImpl7.html @@ -1002,6 +1002,7 @@ clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -1009,7 +1010,6 @@ clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/docs/userguides.rst b/docs/userguides.rst index eb70028e7b..1b44c48fe9 100644 --- a/docs/userguides.rst +++ b/docs/userguides.rst @@ -7,79 +7,82 @@ User Guides :hidden: CommandGuide/index + DeveloperPolicy + GettingStartedVS + FAQ + Lexicon + Packaging -\ - - * `The LLVM Getting Started Guide <GettingStarted.html>`_ +* `The LLVM Getting Started Guide <GettingStarted.html>`_ - Discusses how to get up and running quickly with the LLVM infrastructure. - Everything from unpacking and compilation of the distribution to execution - of some tools. + Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. - * `LLVM CMake guide <CMake.html>`_ +* `LLVM CMake guide <CMake.html>`_ - An addendum to the main Getting Started guide for those using the `CMake - build system <http://www.cmake.org>`_. + An addendum to the main Getting Started guide for those using the `CMake + build system <http://www.cmake.org>`_. - * `Getting Started with the LLVM System using Microsoft Visual Studio - <GettingStartedVS.html>`_ +* `Getting Started with the LLVM System using Microsoft Visual Studio + <GettingStartedVS.html>`_ - An addendum to the main Getting Started guide for those using Visual Studio - on Windows. + An addendum to the main Getting Started guide for those using Visual Studio + on Windows. - * `LLVM Tutorial <tutorial/>`_ +* `LLVM Tutorial <tutorial/>`_ - A walk through the process of using LLVM for a custom language, and the - facilities LLVM offers in tutorial form. + A walk through the process of using LLVM for a custom language, and the + facilities LLVM offers in tutorial form. - * `Developer Policy <DeveloperPolicy.html>`_ +* :ref:`developer_policy` - The LLVM project's policy towards developers and their contributions. + The LLVM project's policy towards developers and their contributions. - * :ref:`LLVM Command Guide <commands>` +* :ref:`LLVM Command Guide <commands>` - A reference manual for the LLVM command line utilities ("man" pages for LLVM - tools). + A reference manual for the LLVM command line utilities ("man" pages for LLVM + tools). - * `LLVM's Analysis and Transform Passes <Passes.html>`_ +* `LLVM's Analysis and Transform Passes <Passes.html>`_ - A list of optimizations and analyses implemented in LLVM. + A list of optimizations and analyses implemented in LLVM. - * `Frequently Asked Questions <FAQ.html>`_ +* :ref:`faq` - A list of common questions and problems and their solutions. + A list of common questions and problems and their solutions. - * `Release notes for the current release <ReleaseNotes.html>`_ +* `Release notes for the current release <ReleaseNotes.html>`_ - This describes new features, known bugs, and other limitations. + This describes new features, known bugs, and other limitations. - * `How to Submit A Bug Report <HowToSubmitABug.html>`_ +* `How to Submit A Bug Report <HowToSubmitABug.html>`_ - Instructions for properly submitting information about any bugs you run into - in the LLVM system. + Instructions for properly submitting information about any bugs you run into + in the LLVM system. - * `LLVM Testing Infrastructure Guide <TestingGuide.html>`_ +* `LLVM Testing Infrastructure Guide <TestingGuide.html>`_ - A reference manual for using the LLVM testing infrastructure. + A reference manual for using the LLVM testing infrastructure. - * `How to build the C, C++, ObjC, and ObjC++ front end <http://clang.llvm.org/get_started.html>`_ +* `How to build the C, C++, ObjC, and ObjC++ front end <http://clang.llvm.org/get_started.html>`_ - Instructions for building the clang front-end from source. + Instructions for building the clang front-end from source. - * `Packaging guide <Packaging.html>`_ +* :ref:`packaging` - Advice on packaging LLVM into a distribution. + Advice on packaging LLVM into a distribution. - * `The LLVM Lexicon <Lexicon.html>`_ +* :ref:`lexicon` - Definition of acronyms, terms and concepts used in LLVM. + Definition of acronyms, terms and concepts used in LLVM. - * `How To Add Your Build Configuration To LLVM Buildbot Infrastructure <HowToAddABuilder.html>`_ +* `How To Add Your Build Configuration To LLVM Buildbot Infrastructure <HowToAddABuilder.html>`_ - Instructions for adding new builder to LLVM buildbot master. + Instructions for adding new builder to LLVM buildbot master. - * **IRC** -- You can probably find help on the unofficial LLVM IRC. +* **IRC** -- You can probably find help on the unofficial LLVM IRC. - We often are on irc.oftc.net in the #llvm channel. If you are using the - mozilla browser, and have chatzilla installed, you can `join #llvm on - irc.oftc.net <irc://irc.oftc.net/llvm>`_. + We often are on irc.oftc.net in the #llvm channel. If you are using the + mozilla browser, and have chatzilla installed, you can `join #llvm on + irc.oftc.net <irc://irc.oftc.net/llvm>`_. diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h index add0687d54..c069feb51e 100644 --- a/examples/BrainF/BrainF.h +++ b/examples/BrainF/BrainF.h @@ -15,9 +15,9 @@ #ifndef BRAINF_H #define BRAINF_H +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 0702baeb97..6dbd6626de 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -52,6 +52,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/Intrinsics.h" @@ -59,7 +60,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/TargetSelect.h" diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index a7d1ca8ff6..cfd9b1e33c 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -40,7 +40,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { // Create the fib function and insert it into module M. This function is said // to return an int and take an int parameter. Function *FibF = - cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), + cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), Type::getInt32Ty(Context), (Type *)0)); @@ -94,7 +94,7 @@ int main(int argc, char **argv) { InitializeNativeTarget(); LLVMContext Context; - + // Create some module to put our function into it. OwningPtr<Module> M(new Module("test", Context)); @@ -132,6 +132,6 @@ int main(int argc, char **argv) { // import result of execution outs() << "Result: " << GV.IntVal << "\n"; - + return 0; } diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 92b2860eec..5588e923df 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -34,18 +34,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/ExecutionEngine/JIT.h" #include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp index 33980f5ba8..c1e34b2f09 100644 --- a/examples/Kaleidoscope/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -1,8 +1,8 @@ #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Support/IRBuilder.h" #include <cstdio> #include <string> #include <map> diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index 9a283229b9..cce4466ed5 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -1,6 +1,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -8,7 +9,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index adfbad5074..36dd760e5f 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -1,6 +1,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -8,7 +9,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index c16d6bdb5b..db3495dcc9 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1,6 +1,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -8,7 +9,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 87b28c3056..143b30bf47 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1,6 +1,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/PassManager.h" @@ -8,7 +9,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetSelect.h" #include <cstdio> #include <string> diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 17f036d613..23df52cabe 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -21,9 +21,9 @@ /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' and 'unwrap' conversion functions. */ +#include "llvm/IRBuilder.h" #include "llvm/Module.h" #include "llvm/PassRegistry.h" -#include "llvm/Support/IRBuilder.h" extern "C" { #endif diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 07a17535fc..783c0b4d9d 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -15,6 +15,7 @@ #define LLVM_ADT_DENSEMAP_H #include "llvm/Support/Compiler.h" +#include "llvm/Support/AlignOf.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/type_traits.h" @@ -24,6 +25,7 @@ #include <new> #include <utility> #include <cassert> +#include <climits> #include <cstddef> #include <cstring> @@ -34,112 +36,83 @@ template<typename KeyT, typename ValueT, bool IsConst = false> class DenseMapIterator; -template<typename KeyT, typename ValueT, - typename KeyInfoT = DenseMapInfo<KeyT> > -class DenseMap { +template<typename DerivedT, + typename KeyT, typename ValueT, typename KeyInfoT> +class DenseMapBase { +protected: typedef std::pair<KeyT, ValueT> BucketT; - unsigned NumBuckets; - BucketT *Buckets; - unsigned NumEntries; - unsigned NumTombstones; public: typedef KeyT key_type; typedef ValueT mapped_type; typedef BucketT value_type; - DenseMap(const DenseMap &other) { - NumBuckets = 0; - CopyFrom(other); - } - -#if LLVM_USE_RVALUE_REFERENCES - DenseMap(DenseMap &&other) { - init(0); - swap(other); - } -#endif - - explicit DenseMap(unsigned NumInitBuckets = 0) { - init(NumInitBuckets); - } - - template<typename InputIt> - DenseMap(const InputIt &I, const InputIt &E) { - init(NextPowerOf2(std::distance(I, E))); - insert(I, E); - } - - ~DenseMap() { - DestroyAll(); - } - typedef DenseMapIterator<KeyT, ValueT, KeyInfoT> iterator; typedef DenseMapIterator<KeyT, ValueT, KeyInfoT, true> const_iterator; inline iterator begin() { // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets(). - return empty() ? end() : iterator(Buckets, Buckets+NumBuckets); + return empty() ? end() : iterator(getBuckets(), getBucketsEnd()); } inline iterator end() { - return iterator(Buckets+NumBuckets, Buckets+NumBuckets, true); + return iterator(getBucketsEnd(), getBucketsEnd(), true); } inline const_iterator begin() const { - return empty() ? end() : const_iterator(Buckets, Buckets+NumBuckets); + return empty() ? end() : const_iterator(getBuckets(), getBucketsEnd()); } inline const_iterator end() const { - return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets, true); + return const_iterator(getBucketsEnd(), getBucketsEnd(), true); } - bool empty() const { return NumEntries == 0; } - unsigned size() const { return NumEntries; } + bool empty() const { return getNumEntries() == 0; } + unsigned size() const { return getNumEntries(); } /// Grow the densemap so that it has at least Size buckets. Does not shrink void resize(size_t Size) { - if (Size > NumBuckets) + if (Size > getNumBuckets()) grow(Size); } void clear() { - if (NumEntries == 0 && NumTombstones == 0) return; + if (getNumEntries() == 0 && getNumTombstones() == 0) return; // If the capacity of the array is huge, and the # elements used is small, // shrink the array. - if (NumEntries * 4 < NumBuckets && NumBuckets > 64) { + if (getNumEntries() * 4 < getNumBuckets() && getNumBuckets() > 64) { shrink_and_clear(); return; } const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); - for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) { + for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { if (!KeyInfoT::isEqual(P->first, EmptyKey)) { if (!KeyInfoT::isEqual(P->first, TombstoneKey)) { P->second.~ValueT(); - --NumEntries; + decrementNumEntries(); } P->first = EmptyKey; } } - assert(NumEntries == 0 && "Node count imbalance!"); - NumTombstones = 0; + assert(getNumEntries() == 0 && "Node count imbalance!"); + setNumTombstones(0); } /// count - Return true if the specified key is in the map. bool count(const KeyT &Val) const { - BucketT *TheBucket; + const BucketT *TheBucket; return LookupBucketFor(Val, TheBucket); } iterator find(const KeyT &Val) { BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return iterator(TheBucket, Buckets+NumBuckets, true); + return iterator(TheBucket, getBucketsEnd(), true); return end(); } const_iterator find(const KeyT &Val) const { - BucketT *TheBucket; + const BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return const_iterator(TheBucket, Buckets+NumBuckets, true); + return const_iterator(TheBucket, getBucketsEnd(), true); return end(); } @@ -152,21 +125,21 @@ public: iterator find_as(const LookupKeyT &Val) { BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return iterator(TheBucket, Buckets+NumBuckets, true); + return iterator(TheBucket, getBucketsEnd(), true); return end(); } template<class LookupKeyT> const_iterator find_as(const LookupKeyT &Val) const { - BucketT *TheBucket; + const BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) - return const_iterator(TheBucket, Buckets+NumBuckets, true); + return const_iterator(TheBucket, getBucketsEnd(), true); return end(); } /// lookup - Return the entry for the specified key, or a default /// constructed value if no such entry exists. ValueT lookup(const KeyT &Val) const { - BucketT *TheBucket; + const BucketT *TheBucket; if (LookupBucketFor(Val, TheBucket)) return TheBucket->second; return ValueT(); @@ -178,12 +151,12 @@ public: std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) { BucketT *TheBucket; if (LookupBucketFor(KV.first, TheBucket)) - return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true), + return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), false); // Already in map. // Otherwise, insert the new element. TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket); - return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true), true); + return std::make_pair(iterator(TheBucket, getBucketsEnd(), true), true); } /// insert - Range insertion of pairs. @@ -201,23 +174,16 @@ public: TheBucket->second.~ValueT(); TheBucket->first = getTombstoneKey(); - --NumEntries; - ++NumTombstones; + decrementNumEntries(); + incrementNumTombstones(); return true; } void erase(iterator I) { BucketT *TheBucket = &*I; TheBucket->second.~ValueT(); TheBucket->first = getTombstoneKey(); - --NumEntries; - ++NumTombstones; - } - - void swap(DenseMap& RHS) { - std::swap(NumBuckets, RHS.NumBuckets); - std::swap(Buckets, RHS.Buckets); - std::swap(NumEntries, RHS.NumEntries); - std::swap(NumTombstones, RHS.NumTombstones); + decrementNumEntries(); + incrementNumTombstones(); } value_type& FindAndConstruct(const KeyT &Key) { @@ -246,39 +212,27 @@ public: } #endif - DenseMap& operator=(const DenseMap& other) { - CopyFrom(other); - return *this; - } - -#if LLVM_USE_RVALUE_REFERENCES - DenseMap& operator=(DenseMap &&other) { - DestroyAll(); - init(0); - swap(other); - return *this; - } -#endif - /// isPointerIntoBucketsArray - Return true if the specified pointer points /// somewhere into the DenseMap's array of buckets (i.e. either to a key or /// value in the DenseMap). bool isPointerIntoBucketsArray(const void *Ptr) const { - return Ptr >= Buckets && Ptr < Buckets+NumBuckets; + return Ptr >= getBuckets() && Ptr < getBucketsEnd(); } /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets /// array. In conjunction with the previous method, this can be used to /// determine whether an insertion caused the DenseMap to reallocate. - const void *getPointerIntoBucketsArray() const { return Buckets; } + const void *getPointerIntoBucketsArray() const { return getBuckets(); } -private: - void DestroyAll() { - if (NumBuckets == 0) // Nothing to do. +protected: + DenseMapBase() {} + + void destroyAll() { + if (getNumBuckets() == 0) // Nothing to do. return; const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey(); - for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) { + for (BucketT *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) { if (!KeyInfoT::isEqual(P->first, EmptyKey) && !KeyInfoT::isEqual(P->first, TombstoneKey)) P->second.~ValueT(); @@ -286,36 +240,140 @@ private: } #ifndef NDEBUG - memset((void*)Buckets, 0x5a, sizeof(BucketT)*NumBuckets); + memset((void*)getBuckets(), 0x5a, sizeof(BucketT)*getNumBuckets()); #endif - operator delete(Buckets); } - void CopyFrom(const DenseMap& other) { - DestroyAll(); + void initEmpty() { + setNumEntries(0); + setNumTombstones(0); - NumEntries = other.NumEntries; - NumTombstones = other.NumTombstones; - NumBuckets = other.NumBuckets; + assert((getNumBuckets() & (getNumBuckets()-1)) == 0 && + "# initial buckets must be a power of two!"); + const KeyT EmptyKey = getEmptyKey(); + for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B) + new (&B->first) KeyT(EmptyKey); + } - if (NumBuckets == 0) { - Buckets = 0; - return; + void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) { + initEmpty(); + + // Insert all the old elements. + const KeyT EmptyKey = getEmptyKey(); + const KeyT TombstoneKey = getTombstoneKey(); + for (BucketT *B = OldBucketsBegin, *E = OldBucketsEnd; B != E; ++B) { + if (!KeyInfoT::isEqual(B->first, EmptyKey) && + !KeyInfoT::isEqual(B->first, TombstoneKey)) { + // Insert the key/value into the new table. + BucketT *DestBucket; + bool FoundVal = LookupBucketFor(B->first, DestBucket); + (void)FoundVal; // silence warning. + assert(!FoundVal && "Key already in new map?"); + DestBucket->first = llvm_move(B->first); + new (&DestBucket->second) ValueT(llvm_move(B->second)); + incrementNumEntries(); + + // Free the value. + B->second.~ValueT(); + } + B->first.~KeyT(); } - Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets)); +#ifndef NDEBUG + if (OldBucketsBegin != OldBucketsEnd) + memset((void*)OldBucketsBegin, 0x5a, + sizeof(BucketT) * (OldBucketsEnd - OldBucketsBegin)); +#endif + } + + template <typename OtherBaseT> + void copyFrom(const DenseMapBase<OtherBaseT, KeyT, ValueT, KeyInfoT>& other) { + assert(getNumBuckets() == other.getNumBuckets()); + + setNumEntries(other.getNumEntries()); + setNumTombstones(other.getNumTombstones()); if (isPodLike<KeyT>::value && isPodLike<ValueT>::value) - memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT)); + memcpy(getBuckets(), other.getBuckets(), + getNumBuckets() * sizeof(BucketT)); else - for (size_t i = 0; i < NumBuckets; ++i) { - new (&Buckets[i].first) KeyT(other.Buckets[i].first); - if (!KeyInfoT::isEqual(Buckets[i].first, getEmptyKey()) && - !KeyInfoT::isEqual(Buckets[i].first, getTombstoneKey())) - new (&Buckets[i].second) ValueT(other.Buckets[i].second); + for (size_t i = 0; i < getNumBuckets(); ++i) { + new (&getBuckets()[i].first) KeyT(other.getBuckets()[i].first); + if (!KeyInfoT::isEqual(getBuckets()[i].first, getEmptyKey()) && + !KeyInfoT::isEqual(getBuckets()[i].first, getTombstoneKey())) + new (&getBuckets()[i].second) ValueT(other.getBuckets()[i].second); } } + void swap(DenseMapBase& RHS) { + std::swap(getNumEntries(), RHS.getNumEntries()); + std::swap(getNumTombstones(), RHS.getNumTombstones()); + } + + static unsigned getHashValue(const KeyT &Val) { + return KeyInfoT::getHashValue(Val); + } + template<typename LookupKeyT> + static unsigned getHashValue(const LookupKeyT &Val) { + return KeyInfoT::getHashValue(Val); + } + static const KeyT getEmptyKey() { + return KeyInfoT::getEmptyKey(); + } + static const KeyT getTombstoneKey() { + return KeyInfoT::getTombstoneKey(); + } + +private: + unsigned getNumEntries() const { + return static_cast<const DerivedT *>(this)->getNumEntries(); + } + void setNumEntries(unsigned Num) { + static_cast<DerivedT *>(this)->setNumEntries(Num); + } + void incrementNumEntries() { + setNumEntries(getNumEntries() + 1); + } + void decrementNumEntries() { + setNumEntries(getNumEntries() - 1); + } + unsigned getNumTombstones() const { + return static_cast<const DerivedT *>(this)->getNumTombstones(); + } + void setNumTombstones(unsigned Num) { + static_cast<DerivedT *>(this)->setNumTombstones(Num); + } + void incrementNumTombstones() { + setNumTombstones(getNumTombstones() + 1); + } + void decrementNumTombstones() { + setNumTombstones(getNumTombstones() - 1); + } + const BucketT *getBuckets() const { + return static_cast<const DerivedT *>(this)->getBuckets(); + } + BucketT *getBuckets() { + return static_cast<DerivedT *>(this)->getBuckets(); + } + unsigned getNumBuckets() const { + return static_cast<const DerivedT *>(this)->getNumBuckets(); + } + BucketT *getBucketsEnd() { + return getBuckets() + getNumBuckets(); + } + const BucketT *getBucketsEnd() const { + return getBuckets() + getNumBuckets(); + } + + void grow(unsigned AtLeast) { + static_cast<DerivedT *>(this)->grow(AtLeast); + } + + void shrink_and_clear() { + static_cast<DerivedT *>(this)->shrink_and_clear(); + } + + BucketT *InsertIntoBucket(const KeyT &Key, const ValueT &Value, BucketT *TheBucket) { TheBucket = InsertIntoBucketImpl(Key, TheBucket); @@ -354,54 +412,47 @@ private: // probe almost the entire table until it found the empty bucket. If the // table completely filled with tombstones, no lookup would ever succeed, // causing infinite loops in lookup. - ++NumEntries; - if (NumEntries*4 >= NumBuckets*3) { + unsigned NewNumEntries = getNumEntries() + 1; + unsigned NumBuckets = getNumBuckets(); + if (NewNumEntries*4 >= NumBuckets*3) { this->grow(NumBuckets * 2); LookupBucketFor(Key, TheBucket); + NumBuckets = getNumBuckets(); } - if (NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) { + if (NumBuckets-(NewNumEntries+getNumTombstones()) <= NumBuckets/8) { this->grow(NumBuckets); LookupBucketFor(Key, TheBucket); } + // Only update the state after we've grown our bucket space appropriately + // so that when growing buckets we have self-consistent entry count. + incrementNumEntries(); + // If we are writing over a tombstone, remember this. if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey())) - --NumTombstones; + decrementNumTombstones(); return TheBucket; } - static unsigned getHashValue(const KeyT &Val) { - return KeyInfoT::getHashValue(Val); - } - template<typename LookupKeyT> - static unsigned getHashValue(const LookupKeyT &Val) { - return KeyInfoT::getHashValue(Val); - } - static const KeyT getEmptyKey() { - return KeyInfoT::getEmptyKey(); - } - static const KeyT getTombstoneKey() { - return KeyInfoT::getTombstoneKey(); - } - /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in /// FoundBucket. If the bucket contains the key and a value, this returns /// true, otherwise it returns a bucket with an empty marker or tombstone and /// returns false. template<typename LookupKeyT> - bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) const { + bool LookupBucketFor(const LookupKeyT &Val, + const BucketT *&FoundBucket) const { unsigned BucketNo = getHashValue(Val); unsigned ProbeAmt = 1; - BucketT *BucketsPtr = Buckets; + const BucketT *BucketsPtr = getBuckets(); - if (NumBuckets == 0) { + if (getNumBuckets() == 0) { FoundBucket = 0; return false; } // FoundTombstone - Keep track of whether we find a tombstone while probing. - BucketT *FoundTombstone = 0; + const BucketT *FoundTombstone = 0; const KeyT EmptyKey = getEmptyKey(); const KeyT TombstoneKey = getTombstoneKey(); assert(!KeyInfoT::isEqual(Val, EmptyKey) && @@ -409,7 +460,7 @@ private: "Empty/Tombstone value shouldn't be inserted into map!"); while (1) { - BucketT *ThisBucket = BucketsPtr + (BucketNo & (NumBuckets-1)); + const BucketT *ThisBucket = BucketsPtr + (BucketNo & (getNumBuckets()-1)); // Found Val's bucket? If so, return it. if (KeyInfoT::isEqual(Val, ThisBucket->first)) { FoundBucket = ThisBucket; @@ -437,112 +488,476 @@ private: } } - void init(unsigned InitBuckets) { - NumEntries = 0; - NumTombstones = 0; - NumBuckets = InitBuckets; + template <typename LookupKeyT> + bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) { + const BucketT *ConstFoundBucket; + bool Result = const_cast<const DenseMapBase *>(this) + ->LookupBucketFor(Val, ConstFoundBucket); + FoundBucket = const_cast<BucketT *>(ConstFoundBucket); + return Result; + } - if (InitBuckets == 0) { - Buckets = 0; - return; +public: + /// Return the approximate size (in bytes) of the actual map. + /// This is just the raw memory used by DenseMap. + /// If entries are pointers to objects, the size of the referenced objects + /// are not included. + size_t getMemorySize() const { + return getNumBuckets() * sizeof(BucketT); + } +}; + +template<typename KeyT, typename ValueT, + typename KeyInfoT = DenseMapInfo<KeyT> > +class DenseMap + : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT>, + KeyT, ValueT, KeyInfoT> { + // Lift some types from the dependent base class into this class for + // simplicity of referring to them. + typedef DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT> BaseT; + typedef typename BaseT::BucketT BucketT; + friend class DenseMapBase<DenseMap, KeyT, ValueT, KeyInfoT>; + + BucketT *Buckets; + unsigned NumEntries; + unsigned NumTombstones; + unsigned NumBuckets; + +public: + explicit DenseMap(unsigned NumInitBuckets = 0) { + init(NumInitBuckets); + } + + DenseMap(const DenseMap &other) { + init(0); + copyFrom(other); + } + +#if LLVM_USE_RVALUE_REFERENCES + DenseMap(DenseMap &&other) { + init(0); + swap(other); + } +#endif + + template<typename InputIt> + DenseMap(const InputIt &I, const InputIt &E) { + init(NextPowerOf2(std::distance(I, E))); + this->insert(I, E); + } + + ~DenseMap() { + this->destroyAll(); + operator delete(Buckets); + } + + void swap(DenseMap& RHS) { + std::swap(Buckets, RHS.Buckets); + std::swap(NumEntries, RHS.NumEntries); + std::swap(NumTombstones, RHS.NumTombstones); + std::swap(NumBuckets, RHS.NumBuckets); + } + + DenseMap& operator=(const DenseMap& other) { + copyFrom(other); + return *this; + } + +#if LLVM_USE_RVALUE_REFERENCES + DenseMap& operator=(DenseMap &&other) { + this->destroyAll(); + operator delete(Buckets); + init(0); + swap(other); + return *this; + } +#endif + + void copyFrom(const DenseMap& other) { + this->destroyAll(); + operator delete(Buckets); + if (allocateBuckets(other.NumBuckets)) { + this->BaseT::copyFrom(other); + } else { + NumEntries = 0; + NumTombstones = 0; } + } - assert(InitBuckets && (InitBuckets & (InitBuckets-1)) == 0 && - "# initial buckets must be a power of two!"); - Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*InitBuckets)); - // Initialize all the keys to EmptyKey. - const KeyT EmptyKey = getEmptyKey(); - for (unsigned i = 0; i != InitBuckets; ++i) - new (&Buckets[i].first) KeyT(EmptyKey); + void init(unsigned InitBuckets) { + if (allocateBuckets(InitBuckets)) { + this->BaseT::initEmpty(); + } else { + NumEntries = 0; + NumTombstones = 0; + } } void grow(unsigned AtLeast) { unsigned OldNumBuckets = NumBuckets; BucketT *OldBuckets = Buckets; - if (NumBuckets < 64) - NumBuckets = 64; + allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast))); + assert(Buckets); + if (!OldBuckets) { + this->BaseT::initEmpty(); + return; + } - // Double the number of buckets. - while (NumBuckets < AtLeast) - NumBuckets <<= 1; - NumTombstones = 0; - Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*NumBuckets)); + this->moveFromOldBuckets(OldBuckets, OldBuckets+OldNumBuckets); - // Initialize all the keys to EmptyKey. - const KeyT EmptyKey = getEmptyKey(); - for (unsigned i = 0, e = NumBuckets; i != e; ++i) - new (&Buckets[i].first) KeyT(EmptyKey); + // Free the old table. + operator delete(OldBuckets); + } - // Insert all the old elements. - const KeyT TombstoneKey = getTombstoneKey(); - for (BucketT *B = OldBuckets, *E = OldBuckets+OldNumBuckets; B != E; ++B) { - if (!KeyInfoT::isEqual(B->first, EmptyKey) && - !KeyInfoT::isEqual(B->first, TombstoneKey)) { - // Insert the key/value into the new table. - BucketT *DestBucket; - bool FoundVal = LookupBucketFor(B->first, DestBucket); - (void)FoundVal; // silence warning. - assert(!FoundVal && "Key already in new map?"); - DestBucket->first = llvm_move(B->first); - new (&DestBucket->second) ValueT(llvm_move(B->second)); + void shrink_and_clear() { + unsigned OldNumEntries = NumEntries; + this->destroyAll(); - // Free the value. - B->second.~ValueT(); - } - B->first.~KeyT(); + // Reduce the number of buckets. + unsigned NewNumBuckets + = std::max(64, 1 << (Log2_32_Ceil(OldNumEntries) + 1)); + if (NewNumBuckets == NumBuckets) { + this->BaseT::initEmpty(); + return; } -#ifndef NDEBUG - if (OldNumBuckets) - memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); + operator delete(Buckets); + init(NewNumBuckets); + } + +private: + unsigned getNumEntries() const { + return NumEntries; + } + void setNumEntries(unsigned Num) { + NumEntries = Num; + } + + unsigned getNumTombstones() const { + return NumTombstones; + } + void setNumTombstones(unsigned Num) { + NumTombstones = Num; + } + + BucketT *getBuckets() const { + return Buckets; + } + + unsigned getNumBuckets() const { + return NumBuckets; + } + + bool allocateBuckets(unsigned Num) { + NumBuckets = Num; + if (NumBuckets == 0) { + Buckets = 0; + return false; + } + + Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets)); + return true; + } +}; + +template<typename KeyT, typename ValueT, + unsigned InlineBuckets = 4, + typename KeyInfoT = DenseMapInfo<KeyT> > +class SmallDenseMap + : public DenseMapBase<SmallDenseMap<KeyT, ValueT, InlineBuckets, KeyInfoT>, + KeyT, ValueT, KeyInfoT> { + // Lift some types from the dependent base class into this class for + // simplicity of referring to them. + typedef DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT> BaseT; + typedef typename BaseT::BucketT BucketT; + friend class DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT>; + + unsigned Small : 1; + unsigned NumEntries : 31; + unsigned NumTombstones; + + struct LargeRep { + BucketT *Buckets; + unsigned NumBuckets; + }; + + /// A "union" of an inline bucket array and the struct representing + /// a large bucket. This union will be discriminated by the 'Small' bit. + typename AlignedCharArray<BucketT[InlineBuckets], LargeRep>::union_type + storage; + +public: + explicit SmallDenseMap(unsigned NumInitBuckets = 0) { + init(NumInitBuckets); + } + + SmallDenseMap(const SmallDenseMap &other) { + init(0); + copyFrom(other); + } + +#if LLVM_USE_RVALUE_REFERENCES + SmallDenseMap(SmallDenseMap &&other) { + init(0); + swap(other); + } #endif - // Free the old table. - operator delete(OldBuckets); + + template<typename InputIt> + SmallDenseMap(const InputIt &I, const InputIt &E) { + init(NextPowerOf2(std::distance(I, E))); + this->insert(I, E); } - void shrink_and_clear() { - unsigned OldNumBuckets = NumBuckets; - BucketT *OldBuckets = Buckets; + ~SmallDenseMap() { + this->destroyAll(); + deallocateBuckets(); + } - // Reduce the number of buckets. - NumBuckets = NumEntries > 32 ? 1 << (Log2_32_Ceil(NumEntries) + 1) - : 64; - NumTombstones = 0; - Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*NumBuckets)); + void swap(SmallDenseMap& RHS) { + unsigned TmpNumEntries = RHS.NumEntries; + RHS.NumEntries = NumEntries; + NumEntries = TmpNumEntries; + std::swap(NumTombstones, RHS.NumTombstones); - // Initialize all the keys to EmptyKey. - const KeyT EmptyKey = getEmptyKey(); - for (unsigned i = 0, e = NumBuckets; i != e; ++i) - new (&Buckets[i].first) KeyT(EmptyKey); + const KeyT EmptyKey = this->getEmptyKey(); + const KeyT TombstoneKey = this->getTombstoneKey(); + if (Small && RHS.Small) { + // If we're swapping inline bucket arrays, we have to cope with some of + // the tricky bits of DenseMap's storage system: the buckets are not + // fully initialized. Thus we swap every key, but we may have + // a one-directional move of the value. + for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { + BucketT *LHSB = &getInlineBuckets()[i], + *RHSB = &RHS.getInlineBuckets()[i]; + bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->first, EmptyKey) && + !KeyInfoT::isEqual(LHSB->first, TombstoneKey)); + bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->first, EmptyKey) && + !KeyInfoT::isEqual(RHSB->first, TombstoneKey)); + if (hasLHSValue && hasRHSValue) { + // Swap together if we can... + std::swap(*LHSB, *RHSB); + continue; + } + // Swap separately and handle any assymetry. + std::swap(LHSB->first, RHSB->first); + if (hasLHSValue) { + new (&RHSB->second) ValueT(llvm_move(LHSB->second)); + LHSB->second.~ValueT(); + } else if (hasRHSValue) { + new (&LHSB->second) ValueT(llvm_move(RHSB->second)); + RHSB->second.~ValueT(); + } + } + return; + } + if (!Small && !RHS.Small) { + std::swap(getLargeRep()->Buckets, RHS.getLargeRep()->Buckets); + std::swap(getLargeRep()->NumBuckets, RHS.getLargeRep()->NumBuckets); + return; + } - // Free the old buckets. - const KeyT TombstoneKey = getTombstoneKey(); - for (BucketT *B = OldBuckets, *E = OldBuckets+OldNumBuckets; B != E; ++B) { - if (!KeyInfoT::isEqual(B->first, EmptyKey) && - !KeyInfoT::isEqual(B->first, TombstoneKey)) { - // Free the value. - B->second.~ValueT(); + SmallDenseMap &SmallSide = Small ? *this : RHS; + SmallDenseMap &LargeSide = Small ? RHS : *this; + + // First stash the large side's rep and move the small side across. + LargeRep TmpRep = llvm_move(*LargeSide.getLargeRep()); + LargeSide.getLargeRep()->~LargeRep(); + LargeSide.Small = true; + // This is similar to the standard move-from-old-buckets, but the bucket + // count hasn't actually rotated in this case. So we have to carefully + // move construct the keys and values into their new locations, but there + // is no need to re-hash things. + for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { + BucketT *NewB = &LargeSide.getInlineBuckets()[i], + *OldB = &SmallSide.getInlineBuckets()[i]; + new (&NewB->first) KeyT(llvm_move(OldB->first)); + OldB->first.~KeyT(); + if (!KeyInfoT::isEqual(NewB->first, EmptyKey) && + !KeyInfoT::isEqual(NewB->first, TombstoneKey)) { + new (&NewB->second) ValueT(llvm_move(OldB->second)); + OldB->second.~ValueT(); } - B->first.~KeyT(); } -#ifndef NDEBUG - memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); + // The hard part of moving the small buckets across is done, just move + // the TmpRep into its new home. + SmallSide.Small = false; + new (SmallSide.getLargeRep()) LargeRep(llvm_move(TmpRep)); + } + + SmallDenseMap& operator=(const SmallDenseMap& other) { + copyFrom(other); + return *this; + } + +#if LLVM_USE_RVALUE_REFERENCES + SmallDenseMap& operator=(SmallDenseMap &&other) { + this->destroyAll(); + deallocateBuckets(); + init(0); + swap(other); + return *this; + } #endif + + void copyFrom(const SmallDenseMap& other) { + this->destroyAll(); + deallocateBuckets(); + Small = true; + if (other.getNumBuckets() > InlineBuckets) { + Small = false; + allocateBuckets(other.getNumBuckets()); + } + this->BaseT::copyFrom(other); + } + + void init(unsigned InitBuckets) { + Small = true; + if (InitBuckets > InlineBuckets) { + Small = false; + new (getLargeRep()) LargeRep(allocateBuckets(InitBuckets)); + } + this->BaseT::initEmpty(); + } + + void grow(unsigned AtLeast) { + if (AtLeast > InlineBuckets) + AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast)); + + if (Small) { + if (AtLeast <= InlineBuckets) + return; // Nothing to do. + + // First move the inline buckets into a temporary storage. + typename AlignedCharArray<BucketT[InlineBuckets]>::union_type + TmpStorage; + BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer); + BucketT *TmpEnd = TmpBegin; + + // Loop over the buckets, moving non-empty, non-tombstones into the + // temporary storage. Have the loop move the TmpEnd forward as it goes. + const KeyT EmptyKey = this->getEmptyKey(); + const KeyT TombstoneKey = this->getTombstoneKey(); + for (BucketT *P = getBuckets(), *E = P + InlineBuckets; P != E; ++P) { + if (!KeyInfoT::isEqual(P->first, EmptyKey) && + !KeyInfoT::isEqual(P->first, TombstoneKey)) { + assert(size_t(TmpEnd - TmpBegin) < InlineBuckets && + "Too many inline buckets!"); + new (&TmpEnd->first) KeyT(llvm_move(P->first)); + new (&TmpEnd->second) ValueT(llvm_move(P->second)); + ++TmpEnd; + P->second.~ValueT(); + } + P->first.~KeyT(); + } + + // Now make this map use the large rep, and move all the entries back + // into it. + Small = false; + new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); + this->moveFromOldBuckets(TmpBegin, TmpEnd); + return; + } + + LargeRep OldRep = llvm_move(*getLargeRep()); + getLargeRep()->~LargeRep(); + if (AtLeast <= InlineBuckets) { + Small = true; + } else { + new (getLargeRep()) LargeRep(allocateBuckets(AtLeast)); + } + + this->moveFromOldBuckets(OldRep.Buckets, OldRep.Buckets+OldRep.NumBuckets); + // Free the old table. - operator delete(OldBuckets); + operator delete(OldRep.Buckets); + } - NumEntries = 0; + void shrink_and_clear() { + unsigned OldSize = this->size(); + this->destroyAll(); + + // Reduce the number of buckets. + unsigned NewNumBuckets = 0; + if (OldSize) { + NewNumBuckets = 1 << (Log2_32_Ceil(OldSize) + 1); + if (NewNumBuckets > InlineBuckets && NewNumBuckets < 64u) + NewNumBuckets = 64; + } + if ((Small && NewNumBuckets <= InlineBuckets) || + (!Small && NewNumBuckets == getLargeRep()->NumBuckets)) { + this->BaseT::initEmpty(); + return; + } + + deallocateBuckets(); + init(NewNumBuckets); } - -public: - /// Return the approximate size (in bytes) of the actual map. - /// This is just the raw memory used by DenseMap. - /// If entries are pointers to objects, the size of the referenced objects - /// are not included. - size_t getMemorySize() const { - return NumBuckets * sizeof(BucketT); + +private: + unsigned getNumEntries() const { + return NumEntries; + } + void setNumEntries(unsigned Num) { + assert(Num < INT_MAX && "Cannot support more than INT_MAX entries"); + NumEntries = Num; + } + + unsigned getNumTombstones() const { + return NumTombstones; + } + void setNumTombstones(unsigned Num) { + NumTombstones = Num; + } + + const BucketT *getInlineBuckets() const { + assert(Small); + // Note that this cast does not violate aliasing rules as we assert that + // the memory's dynamic type is the small, inline bucket buffer, and the + // 'storage.buffer' static type is 'char *'. + return reinterpret_cast<const BucketT *>(storage.buffer); + } + BucketT *getInlineBuckets() { + return const_cast<BucketT *>( + const_cast<const SmallDenseMap *>(this)->getInlineBuckets()); + } + const LargeRep *getLargeRep() const { + assert(!Small); + // Note, same rule about aliasing as with getInlineBuckets. + return reinterpret_cast<const LargeRep *>(storage.buffer); + } + LargeRep *getLargeRep() { + return const_cast<LargeRep *>( + const_cast<const SmallDenseMap *>(this)->getLargeRep()); + } + + const BucketT *getBuckets() const { + return Small ? getInlineBuckets() : getLargeRep()->Buckets; + } + BucketT *getBuckets() { + return const_cast<BucketT *>( + const_cast<const SmallDenseMap *>(this)->getBuckets()); + } + unsigned getNumBuckets() const { + return Small ? InlineBuckets : getLargeRep()->NumBuckets; + } + + void deallocateBuckets() { + if (Small) + return; + + operator delete(getLargeRep()->Buckets); + getLargeRep()->~LargeRep(); + } + + LargeRep allocateBuckets(unsigned Num) { + assert(Num > InlineBuckets && "Must allocate more buckets than are inline"); + LargeRep Rep = { + static_cast<BucketT*>(operator new(sizeof(BucketT) * Num)), Num + }; + return Rep; } }; diff --git a/include/llvm/ADT/FlatArrayMap.h b/include/llvm/ADT/FlatArrayMap.h deleted file mode 100644 index b414cde7a1..0000000000 --- a/include/llvm/ADT/FlatArrayMap.h +++ /dev/null @@ -1,323 +0,0 @@ -//===- llvm/ADT/FlatArrayMap.h - 'Normally small' pointer set ----*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the FlatArrayMap class. -// See FlatArrayMap doxygen comments for more details. -// -//===----------------------------------------------------------------------===// - -#ifndef FLATARRAYMAP_H_ -#define FLATARRAYMAP_H_ - -#include <algorithm> -#include <utility> -#include "llvm/Support/type_traits.h" - -namespace llvm { - - template <typename KeyTy, typename MappedTy> - struct FlatArrayMapTypes { - typedef KeyTy key_type; - typedef MappedTy mapped_type; - typedef typename std::pair<key_type, mapped_type> value_type; - }; - - template<typename KeyTy, typename MappedTy, bool IsConst = false> - class FlatArrayMapIterator; - - //===--------------------------------------------------------------------===// - /// FlatArrayMap presents map container interface. - /// It uses flat array implementation inside: - /// [ <key0, value0>, <key1, value1>, ... <keyN, valueN> ] - /// It works fast for small amount of elements. - /// User should pass key type, mapped type (type of value), and maximum - /// number of elements. - /// After maximum number of elements is reached, map declines any farther - /// attempts to insert new elements ("insert" method returns <end(),false>). - /// - template <typename KeyTy, typename MappedTy, unsigned MaxArraySize> - class FlatArrayMap { - public: - typedef FlatArrayMapTypes<KeyTy, MappedTy> Types; - - typedef typename Types::key_type key_type; - typedef typename Types::mapped_type mapped_type; - typedef typename Types::value_type value_type; - - typedef FlatArrayMapIterator<KeyTy, MappedTy> iterator; - typedef FlatArrayMapIterator<KeyTy, MappedTy, true> const_iterator; - - typedef FlatArrayMap<KeyTy, MappedTy, MaxArraySize> self; - - private: - - enum { BadIndex = -1U }; - - key_type EmptyKey; - mapped_type EmptyValue; - - value_type Array[MaxArraySize + 1]; - unsigned NumElements; - - unsigned findFor(const KeyTy Ptr) const { - // Linear search for the item. - for (const value_type *APtr = Array, *E = Array + NumElements; - APtr != E; ++APtr) { - if (APtr->first == Ptr) { - return APtr - Array; - } - } - return BadIndex; - } - - bool lookupFor(const KeyTy &Ptr, const value_type*& Found) const { - unsigned FoundIdx = findFor(Ptr); - if (FoundIdx != BadIndex) { - Found = Array + FoundIdx; - return true; - } - return false; - } - - bool lookupFor(const KeyTy &Ptr, value_type*& Found) { - unsigned FoundIdx = findFor(Ptr); - if (FoundIdx != BadIndex) { - Found = Array + FoundIdx; - return true; - } - return false; - } - - - void copyFrom(const self &RHS) { - memcpy(Array, RHS.Array, sizeof(value_type) * (MaxArraySize + 1)); - NumElements = RHS.NumElements; - } - - void init () { - memset(Array + MaxArraySize, 0, sizeof(value_type)); - NumElements = 0; - } - - bool insertInternal(KeyTy Ptr, MappedTy Val, value_type*& Item) { - // Check to see if it is already in the set. - value_type *Found; - if (lookupFor(Ptr, Found)) { - Item = Found; - return false; - } - if (NumElements < MaxArraySize) { - unsigned Idx = NumElements++; - Array[Idx] = std::make_pair(Ptr, Val); - Item = Array + Idx; - return true; - } - Item = Array + MaxArraySize; // return end() - return false; - } - - public: - - // Constructors - - FlatArrayMap() : EmptyKey(), EmptyValue() { - init(); - } - - FlatArrayMap(const self &that) : - EmptyKey(), EmptyValue() { - copyFrom(that); - } - - template<typename It> - FlatArrayMap(It I, It E) : - EmptyKey(), EmptyValue() { - init(); - insert(I, E); - } - - // Size - - unsigned size() const { - return NumElements; - } - - bool empty() const { - return !NumElements; - } - - // Iterators - - iterator begin() { - return iterator(Array); - } - const_iterator begin() const { - return const_iterator(Array); - } - - iterator end() { - return iterator(Array + NumElements); - } - const_iterator end() const { - return const_iterator(Array + NumElements); - } - - // Modifiers - - void clear() { - for (unsigned i = 0; i < NumElements; ++i) { - Array[i].first = EmptyKey; - Array[i].second = EmptyValue; - } - NumElements = 0; - } - - // The map container is extended by inserting a single new element. - // The behavior is the same as the std::map::insert, except the - // case when maximum number of elements is reached; - // in this case map declines any farther attempts - // to insert new elements ("insert" method returns <end(),false>). - std::pair<iterator, bool> insert(const value_type& KV) { - value_type* Item; - bool Res = insertInternal(KV.first, KV.second, Item); - return std::make_pair(iterator(Item), Res); - } - - template <typename IterT> - void insert(IterT I, IterT E) { - for (; I != E; ++I) - insert(*I); - } - - void erase(key_type K) { - unsigned Found = findFor(K); - if (Found != BadIndex) { - value_type *APtr = Array + Found; - value_type *E = Array + NumElements; - *APtr = E[-1]; - E[-1].first.~key_type(); - E[-1].second.~mapped_type(); - --NumElements; - } - } - - void erase(iterator i) { - erase(i->first); - } - - void swap(self& RHS) { - std::swap_ranges(Array, Array+MaxArraySize, RHS.Array); - std::swap(this->NumElements, RHS.NumElements); - } - - // Search operations - - iterator find(const key_type& K) { - value_type *Found; - if (lookupFor(K, Found)) - return iterator(Found); - return end(); - } - - const_iterator find(const key_type& K) const { - const value_type *Found; - if (lookupFor(K, Found)) - return const_iterator(Found); - return end(); - } - - bool count(const key_type& K) const { - return find(K) != end(); - } - - mapped_type &operator[](const key_type &Key) { - std::pair<iterator, bool> res = insert(Key, mapped_type()); - return res.first->second; - } - - // Other operations - - self& operator=(const self& other) { - clear(); - copyFrom(other); - return *this; - } - - /// isPointerIntoBucketsArray - Return true if the specified pointer points - /// somewhere into the map's array of buckets (i.e. either to a key or - /// value). - bool isPointerIntoBucketsArray(const void *Ptr) const { - return Ptr >= Array && Ptr < Array + NumElements; - } - - /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets - /// array. - const void *getPointerIntoBucketsArray() const { return Array; } - }; - - template<typename KeyTy, typename MappedTy, bool IsConst> - class FlatArrayMapIterator { - - typedef FlatArrayMapTypes<KeyTy, MappedTy> Types; - - typedef typename conditional<IsConst, - const typename Types::value_type, - typename Types::value_type>::type value_type; - typedef value_type *pointer; - typedef value_type &reference; - - typedef FlatArrayMapIterator<KeyTy, MappedTy, IsConst> self; - typedef FlatArrayMapIterator<KeyTy, MappedTy, false> non_const_self; - typedef FlatArrayMapIterator<KeyTy, MappedTy, true> const_self; - - friend class FlatArrayMapIterator<KeyTy, MappedTy, false>; - friend class FlatArrayMapIterator<KeyTy, MappedTy, true>; - - pointer TheBucket; - - public: - - FlatArrayMapIterator() : TheBucket(0) {} - - explicit FlatArrayMapIterator(pointer BP) : - TheBucket(BP) {} - - // If IsConst is true this is a converting constructor from iterator to - // const_iterator and the default copy constructor is used. - // Otherwise this is a copy constructor for iterator. - FlatArrayMapIterator(const non_const_self& I) - : TheBucket(I.TheBucket) {} - - bool operator==(const const_self &RHS) const { - return TheBucket->first == RHS.TheBucket->first; - } - bool operator!=(const const_self &RHS) const { - return TheBucket->first != RHS.TheBucket->first; - } - - reference operator*() const { - return *TheBucket; - } - - pointer operator->() const { - return TheBucket; - } - - inline self& operator++() { // Preincrement - ++TheBucket; - return *this; - } - - self operator++(int) { // Postincrement - FlatArrayMapIterator tmp = *this; ++*this; return tmp; - } - }; -} - -#endif /* FLATARRAYMAP_H_ */ diff --git a/include/llvm/ADT/MultiImplMap.h b/include/llvm/ADT/MultiImplMap.h deleted file mode 100644 index da453aa3c4..0000000000 --- a/include/llvm/ADT/MultiImplMap.h +++ /dev/null @@ -1,550 +0,0 @@ -//===- llvm/ADT/MultiImplMap.h - 'Normally small' pointer set ----*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MultiImplMap class. -// MultiImplMap presents map container interface. -// It has two modes, one for small amount of elements and one for big amount. -// User should set map implementation for both of them. User also should -// set the maximum possible number of elements for small mode. -// If user want to use MultiImplMap instead of DenseMap, he should pass -// DenseMapCompatible = true. Note that in this case map implementations should -// present additional DenseMap specific methods (see below). -// Initially MultiImplMap uses small mode and small map implementation. -// It triggered to the big mode when number of contained elements exceeds -// maximum possible elements for small mode. -// -// Types that should be defined in nested map class: -// -// key_type; -// mapped_type; -// value_type; // std::pair<key_type, mapped_type> -// // or std::pair<const key_type, mapped_type> -// iterator; -// const_iterator; -// -// Map implementation should provide the next interface: -// -// // Constructors -// (default constructor) -// (copy constructor) -// -// // Size -// unsigned size() const; -// bool empty() const; -// -// // Iterators -// iterator begin(); -// const_iterator begin(); -// iterator end(); -// const_iterator end(); -// -// // Modifiers -// void clear(); -// std::pair<iterator, bool> insert(const value_type& KV); -// template <typename IterT> -// void insert(IterT I, IterT E); -// void erase(key_type K); -// void erase(iterator i); -// void swap(MultiImplMap& rhs); -// -// // Search operations -// iterator find(const key_type& K); -// const_iterator find(const key_type& K) const; -// bool count(const key_type& K) const; -// mapped_type &operator[](const key_type &Key); -// -// // Other operations -// self& operator=(const self& other); -// -// // If DenseMapCompatible == true, you also should present next methods. -// // See DenseMap comments for more details about its behavior. -// bool isPointerIntoBucketsArray(const void *Ptr) const; -// const void *getPointerIntoBucketsArray() const; -// value_type& FindAndConstruct(const key_type &Key); -// -// The list of methods that should be implemented in nested map iterator class: -// -// (conversion constructor from non-constant iterator) -// -// bool operator==(const const_iterator& rhs) const; -// bool operator!=(const const_iterator& rhs) const; -// reference operator*() const; -// pointer operator->() const; -// inline self& operator++(); -// -// -//===----------------------------------------------------------------------===// - -#ifndef MULTIIMPLEMENTATIONMAP_H_ -#define MULTIIMPLEMENTATIONMAP_H_ - - -#include <algorithm> -#include <utility> -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FlatArrayMap.h" -#include "llvm/Support/type_traits.h" - -namespace llvm { - - template<class SmallMapTy, class BigMapTy, bool IsConst = false> - class MultiImplMapIterator; - - template<class SmallMapTy, class BigMapTy> - struct MultiImplMapIteratorsFactory; - - template<class SmallMapTy, class BigMapTy> - struct MultiImplMapTypes { - typedef typename SmallMapTy::key_type key_type; - typedef typename SmallMapTy::mapped_type mapped_type; - typedef typename std::pair<key_type, mapped_type> value_type; - }; - - //===--------------------------------------------------------------------===// - /// MultiImplMap is map that has two modes, one for small amount of - /// elements and one for big amount. - /// User should set map implementation for both of them. User also should - /// set the maximum possible number of elements for small mode. - /// If user want to use MultiImplMap instead of DenseMap, he should pass - /// DenseMapCompatible = true. - /// Initially MultiImplMap uses small mode and small map implementation. - /// It triggered to the big mode when number of contained elements exceeds - /// maximum possible elements for small mode. - template<class SmallMapTy, class BigMapTy, unsigned MaxSmallN, - bool DenseMapCompatible = false, - class ItFactory = - MultiImplMapIteratorsFactory<SmallMapTy, BigMapTy> > - class MultiImplMap { - - protected: - SmallMapTy SmallMap; - BigMapTy BigMap; - bool UseSmall; - enum { MaxSmallSize = MaxSmallN }; - - public: - typedef MultiImplMapTypes<SmallMapTy, BigMapTy> Types; - - typedef typename Types::key_type key_type; - typedef typename Types::mapped_type mapped_type; - typedef typename Types::value_type value_type; - - typedef typename ItFactory::iterator iterator; - typedef typename ItFactory::const_iterator const_iterator; - - typedef std::pair<iterator, bool> ins_res; - - typedef typename std::pair<typename SmallMapTy::iterator, bool> - small_ins_res; - - typedef typename std::pair<typename BigMapTy::iterator, bool> - big_ins_res; - - typedef MultiImplMap<SmallMapTy, BigMapTy, MaxSmallN> self; - - MultiImplMap() : UseSmall(true) {} - - MultiImplMap(const self& other) { - if (other.UseSmall) { - SmallMap = other.SmallMap; - UseSmall = true; - } else { - if (other.size() <= MaxSmallN) { - SmallMap.insert(other.BigMap.begin(), other.BigMap.end()); - UseSmall = true; - } else { - BigMap = other.BigMap; - UseSmall = false; - } - } - } - - // Size - - unsigned size() const { - if (UseSmall) - return SmallMap.size(); - return BigMap.size(); - } - - bool empty() const { - if (UseSmall) - return SmallMap.empty(); - return BigMap.empty(); - } - - // Iterators - - iterator begin() { - if (UseSmall) - return ItFactory::begin(SmallMap); - return ItFactory::begin(BigMap); - } - const_iterator begin() const { - if (UseSmall) - return ItFactory::begin(SmallMap); - return ItFactory::begin(BigMap); - } - - iterator end() { - if (UseSmall) - return ItFactory::end(SmallMap); - return ItFactory::end(BigMap); - } - const_iterator end() const { - if (UseSmall) - return ItFactory::end(SmallMap); - return ItFactory::end(BigMap); - } - - // Modifiers - - void clear() { - if (UseSmall) - SmallMap.clear(); - else - BigMap.clear(); - } - - std::pair<iterator, bool> insert(const value_type& KV) { - if (UseSmall) { - if (SmallMap.size() < MaxSmallSize) { - small_ins_res Res = SmallMap.insert(KV); - return std::make_pair(ItFactory::it(SmallMap, Res.first), Res.second); - } - - // Move all to big map. - BigMap.insert(SmallMap.begin(), SmallMap.end()); - SmallMap.clear(); - - UseSmall = false; - } - big_ins_res Res = BigMap.insert(KV); - return std::make_pair(ItFactory::it(BigMap, Res.first), Res.second); - } - - template <typename OtherValTy> - std::pair<iterator, bool> insert(const OtherValTy& OtherKV) { - const value_type* KV = reinterpret_cast<const value_type*>( - reinterpret_cast<const void*>(OtherKV)); - return insert(*KV); - } - - template <typename IterT> - void insert(IterT I, IterT E) { - for (; I != E; ++I) - insert(*I); - } - - void erase(key_type K) { - if (UseSmall) - SmallMap.erase(K); - else - BigMap.erase(K); - } - - void erase(iterator i) { - erase(i->first); - } - - void swap(MultiImplMap& rhs) { - SmallMap.swap(rhs.SmallMap); - BigMap.swap(rhs.BigMap); - std::swap(UseSmall, rhs.UseSmall); - } - - // Search operations - - iterator find(const key_type& K) { - if (UseSmall) - return ItFactory::it(SmallMap, SmallMap.find(K)); - return ItFactory::it(BigMap, BigMap.find(K)); - } - - const_iterator find(const key_type& K) const { - if (UseSmall) - return ItFactory::const_it(SmallMap, SmallMap.find(K)); - return ItFactory::const_it(BigMap, BigMap.find(K)); - } - - bool count(const key_type& K) const { - return find(K) != end(); - } - - mapped_type &operator[](const key_type &Key) { - ins_res res = insert(std::make_pair(Key, mapped_type())); - return res.first->second; - } - - // Other operations - - self& operator=(const self& other) { - if (other.isSmall()) { - SmallMap = other.SmallMap; - if (!UseSmall) { - BigMap.clear(); - UseSmall = true; - } - return *this; - } - if (UseSmall) { - SmallMap.clear(); - UseSmall = false; - } - BigMap = other.BigMap; - return *this; - } - - // Utilities - - bool isSmall()const { - return UseSmall; - } - - SmallMapTy& getSmallMap() { - return SmallMap; - } - - const SmallMapTy& getSmallMap() const { - return SmallMap; - } - - BigMapTy& getBigMap() { - return BigMap; - } - - const BigMapTy& getBigMap() const { - return BigMap; - } - }; - - template<class SmallMapTy, class BigMapTy, unsigned MaxSmallN> - class MultiImplMap<SmallMapTy, BigMapTy, MaxSmallN, true> : - public MultiImplMap<SmallMapTy, BigMapTy, MaxSmallN, false> - { - public: - typedef MultiImplMap<SmallMapTy, BigMapTy, MaxSmallN, false> ParentTy; - typedef typename ParentTy::Types Types; - - typedef typename Types::key_type key_type; - typedef typename Types::mapped_type mapped_type; - typedef typename Types::value_type value_type; - typedef typename ParentTy::iterator iterator; - - /// isPointerIntoBucketsArray - Return true if the specified pointer points - /// somewhere into the DenseMap's array of buckets (i.e. either to a key or - /// value). - bool isPointerIntoBucketsArray(const void *Ptr) const { - if (this->UseSmall) - return this->SmallMap.isPointerIntoBucketsArray(Ptr); - return this->BigMap.isPointerIntoBucketsArray(Ptr); - } - - /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets - /// array. In conjunction with the previous method, this can be used to - /// determine whether an insertion caused the map to reallocate data. - const void *getPointerIntoBucketsArray() const { - if (this->UseSmall) - return this->SmallMap.getPointerIntoBucketsArray(); - return this->BigMap.getPointerIntoBucketsArray(); - } - - value_type& FindAndConstruct(const key_type &Key) { - std::pair<iterator, bool> Res = - this->insert(std::make_pair(Key, mapped_type())); - return *Res.first; - } - }; - - template<class SmallMapTy, class BigMapTy, bool IsConst> - class MultiImplMapIterator { - public: - - typedef MultiImplMapTypes<SmallMapTy, BigMapTy> Types; - - typedef typename Types::mapped_type mapped_type; - - typedef typename conditional<IsConst, - const typename Types::value_type, - typename Types::value_type>::type value_type; - - typedef typename conditional<IsConst, - typename SmallMapTy::const_iterator, - typename SmallMapTy::iterator>::type - small_iterator; - - typedef typename conditional<IsConst, - typename BigMapTy::const_iterator, - typename BigMapTy::iterator>::type - big_iterator; - - typedef typename conditional<IsConst, const void*, void*>::type void_ptr_ty; - - typedef value_type *pointer; - typedef value_type &reference; - - typedef MultiImplMapIterator<SmallMapTy, BigMapTy, IsConst> self; - - typedef MultiImplMapIterator<SmallMapTy, BigMapTy, false> non_const_self; - typedef MultiImplMapIterator<SmallMapTy, BigMapTy, true> const_self; - - friend class MultiImplMapIterator<SmallMapTy, BigMapTy, true>; - friend class MultiImplMapIterator<SmallMapTy, BigMapTy, false>; - - protected: - - template <typename OtherValTy> - static value_type* toValueTypePtr(OtherValTy& ValTyRef) { - return reinterpret_cast<value_type*>( - reinterpret_cast<void_ptr_ty>(&ValTyRef)); - } - - template <typename OtherValTy> - static value_type& toValueTypeRef(OtherValTy& ValTyRef) { - return *reinterpret_cast<value_type*>( - reinterpret_cast<void_ptr_ty>(&ValTyRef)); - } - - small_iterator SmallIt; - big_iterator BigIt; - bool UseSmall; - - public: - - MultiImplMapIterator() : UseSmall(true) {} - MultiImplMapIterator(small_iterator It) : SmallIt(It), UseSmall(true) {} - MultiImplMapIterator(big_iterator It) : BigIt(It), UseSmall(false) {} - MultiImplMapIterator(const non_const_self& src) : - SmallIt(src.SmallIt), BigIt(src.BigIt), UseSmall(src.UseSmall) {} - - bool operator==(const const_self& rhs) const { - if (UseSmall != rhs.UseSmall) - return false; - if (UseSmall) - return SmallIt == rhs.SmallIt; - return BigIt == rhs.BigIt; - } - - bool operator!=(const const_self& rhs) const { - if (UseSmall != rhs.UseSmall) - return true; - if (UseSmall) - return SmallIt != rhs.SmallIt; - return BigIt != rhs.BigIt; - } - - reference operator*() const { - return UseSmall ? toValueTypeRef(*SmallIt) : toValueTypeRef(*BigIt);; - } - - pointer operator->() const { - return UseSmall ? toValueTypePtr(*SmallIt) : toValueTypePtr(*BigIt); - } - - // Preincrement - inline self& operator++() { - if (UseSmall) ++SmallIt; - return *this; - } - - // Postincrement - self operator++(int) { - self tmp = *this; ++*this; return tmp; - } - }; - - template<class SmallMapTy, class BigMapTy> - struct MultiImplMapIteratorsFactory { - - typedef MultiImplMapIterator<SmallMapTy, BigMapTy, false> iterator; - typedef MultiImplMapIterator<SmallMapTy, BigMapTy, true> const_iterator; - - template<class MapImpl, class ItTy> - static iterator it(MapImpl& impl, ItTy it) { - return iterator(it); - } - template<class MapImpl, class ConstItTy> - static const_iterator const_it(const MapImpl& impl, ConstItTy it) { - return const_iterator(it); - } - template<class MapImpl> - static iterator begin(MapImpl& impl) { - return iterator(impl.begin()); - } - template<class MapImpl> - static const_iterator begin(const MapImpl& impl) { - return const_iterator(impl.begin()); - } - template<class MapImpl> - static iterator end(MapImpl& impl) { - return iterator(impl.end()); - } - template<class MapImpl> - static const_iterator end(const MapImpl& impl) { - return const_iterator(impl.end()); - } - }; - - template<typename KeyTy, typename MappedTy, unsigned MaxArraySize, - typename KeyInfoT> - struct MultiImplMapIteratorsFactory< - FlatArrayMap<KeyTy, MappedTy, MaxArraySize>, - DenseMap<KeyTy, MappedTy, KeyInfoT> > - { - - typedef FlatArrayMap<KeyTy, MappedTy, MaxArraySize> SmallMapTy; - typedef DenseMap<KeyTy, MappedTy, KeyInfoT> BigMapTy; - - typedef DenseMapIterator<KeyTy, MappedTy, KeyInfoT, false> - iterator; - typedef DenseMapIterator<KeyTy, MappedTy, KeyInfoT, true> - const_iterator; - - static iterator it(SmallMapTy& impl, typename SmallMapTy::iterator it) { - return iterator(&(*it), &(*impl.end())); - } - static const_iterator const_it( - const SmallMapTy& impl, typename SmallMapTy::const_iterator it) { - return const_iterator(&(*it), &(*impl.end())); - } - static iterator it(BigMapTy& impl, typename BigMapTy::iterator it) { - return it; - } - static const_iterator const_it( - const BigMapTy& impl, typename BigMapTy::const_iterator it) { - return it; - } - static iterator begin(SmallMapTy& impl) { - return it(impl, impl.begin()); - } - static const_iterator begin(const SmallMapTy& impl) { - return it(impl, impl.begin()); - } - static iterator begin(BigMapTy& impl) { - return impl.begin(); - } - static const_iterator begin(const BigMapTy& impl) { - return impl.begin(); - } - static iterator end(SmallMapTy& impl) { - return it(impl, impl.end()); - } - static const_iterator end(const SmallMapTy& impl) { - return const_it(impl, impl.end()); - } - static iterator end(BigMapTy& impl) { - return impl.end(); - } - static const_iterator end(const BigMapTy& impl) { - return impl.end(); - } - }; -} - -#endif /* MULTIIMPLEMENTATIONMAP_H_ */ diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 58a23d886f..aee500d4fb 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -59,7 +59,7 @@ struct greater_ptr : public std::binary_function<Ty, Ty, bool> { // for_each(V.begin(), B.end(), deleter<Interval>); // template <class T> -static inline void deleter(T *Ptr) { +inline void deleter(T *Ptr) { delete Ptr; } @@ -238,7 +238,7 @@ inline size_t array_lengthof(T (&)[N]) { /// array_pod_sort_comparator - This is helper function for array_pod_sort, /// which just uses operator< on T. template<typename T> -static inline int array_pod_sort_comparator(const void *P1, const void *P2) { +inline int array_pod_sort_comparator(const void *P1, const void *P2) { if (*reinterpret_cast<const T*>(P1) < *reinterpret_cast<const T*>(P2)) return -1; if (*reinterpret_cast<const T*>(P2) < *reinterpret_cast<const T*>(P1)) @@ -249,7 +249,7 @@ static inline int array_pod_sort_comparator(const void *P1, const void *P2) { /// get_array_pad_sort_comparator - This is an internal helper function used to /// get type deduction of T right. template<typename T> -static int (*get_array_pad_sort_comparator(const T &)) +inline int (*get_array_pad_sort_comparator(const T &)) (const void*, const void*) { return array_pod_sort_comparator<T>; } @@ -270,7 +270,7 @@ static int (*get_array_pad_sort_comparator(const T &)) /// NOTE: If qsort_r were portable, we could allow a custom comparator and /// default to std::less. template<class IteratorTy> -static inline void array_pod_sort(IteratorTy Start, IteratorTy End) { +inline void array_pod_sort(IteratorTy Start, IteratorTy End) { // Don't dereference start iterator of empty sequence. if (Start == End) return; qsort(&*Start, End-Start, sizeof(*Start), @@ -278,7 +278,7 @@ static inline void array_pod_sort(IteratorTy Start, IteratorTy End) { } template<class IteratorTy> -static inline void array_pod_sort(IteratorTy Start, IteratorTy End, +inline void array_pod_sort(IteratorTy Start, IteratorTy End, int (*Compare)(const void*, const void*)) { // Don't dereference start iterator of empty sequence. if (Start == End) return; diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index d43c7afb10..7a645e0c72 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -354,6 +354,19 @@ public: return (*this)[Idx]; } + /// Test if any common bits are set. + bool anyCommon(const SmallBitVector &RHS) const { + if (isSmall() && RHS.isSmall()) + return (getSmallBits() & RHS.getSmallBits()) != 0; + if (!isSmall() && !RHS.isSmall()) + return getPointer()->anyCommon(*RHS.getPointer()); + + for (unsigned i = 0, e = std::min(size(), RHS.size()); i != e; ++i) + if (test(i) && RHS.test(i)) + return true; + return false; + } + // Comparison operators. bool operator==(const SmallBitVector &RHS) const { if (size() != RHS.size()) @@ -442,6 +455,59 @@ public: void swap(SmallBitVector &RHS) { std::swap(X, RHS.X); } + + /// setBitsInMask - Add '1' bits from Mask to this vector. Don't resize. + /// This computes "*this |= Mask". + void setBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + if (isSmall()) + applyMask<true, false>(Mask, MaskWords); + else + getPointer()->setBitsInMask(Mask, MaskWords); + } + + /// clearBitsInMask - Clear any bits in this vector that are set in Mask. + /// Don't resize. This computes "*this &= ~Mask". + void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + if (isSmall()) + applyMask<false, false>(Mask, MaskWords); + else + getPointer()->clearBitsInMask(Mask, MaskWords); + } + + /// setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask. + /// Don't resize. This computes "*this |= ~Mask". + void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + if (isSmall()) + applyMask<true, true>(Mask, MaskWords); + else + getPointer()->setBitsNotInMask(Mask, MaskWords); + } + + /// clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask. + /// Don't resize. This computes "*this &= Mask". + void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) { + if (isSmall()) + applyMask<false, true>(Mask, MaskWords); + else + getPointer()->clearBitsNotInMask(Mask, MaskWords); + } + +private: + template<bool AddBits, bool InvertMask> + void applyMask(const uint32_t *Mask, unsigned MaskWords) { + assert((NumBaseBits == 64 || NumBaseBits == 32) && "Unsupported word size"); + if (NumBaseBits == 64 && MaskWords >= 2) { + uint64_t M = Mask[0] | (uint64_t(Mask[1]) << 32); + if (InvertMask) M = ~M; + if (AddBits) setSmallBits(getSmallBits() | M); + else setSmallBits(getSmallBits() & ~M); + } else { + uint32_t M = Mask[0]; + if (InvertMask) M = ~M; + if (AddBits) setSmallBits(getSmallBits() | M); + else setSmallBits(getSmallBits() & ~M); + } + } }; inline SmallBitVector diff --git a/include/llvm/ADT/SmallMap.h b/include/llvm/ADT/SmallMap.h deleted file mode 100644 index 42d27ce77e..0000000000 --- a/include/llvm/ADT/SmallMap.h +++ /dev/null @@ -1,37 +0,0 @@ -//===- llvm/ADT/SmallMap.h - 'Normally small' pointer set -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the SmallMap class. -// SmallMap is DenseMap compatible MultiImplMap. -// It uses FlatArrayMap for small mode, and DenseMap for big mode. -// See MultiMapImpl comments for more details on the algorithm is used. -// -//===----------------------------------------------------------------------===// - -#ifndef SMALLPTRMAP_H_ -#define SMALLPTRMAP_H_ - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/FlatArrayMap.h" -#include "llvm/ADT/MultiImplMap.h" - -namespace llvm { - - //===--------------------------------------------------------------------===// - /// SmallMap is wrapper around MultiImplMap. It uses FlatArrayMap for - /// small mode, and DenseMap for big mode. - template <typename KeyTy, typename MappedTy, unsigned N = 16> - class SmallMap : public MultiImplMap< - FlatArrayMap<KeyTy, MappedTy, N>, - DenseMap<KeyTy, MappedTy>, - N, true> { - }; -} - -#endif /* SMALLPTRMAP_H_ */ diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 973e0284ab..2d71249c52 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -540,14 +540,14 @@ public: } iterator insert(iterator I, size_type NumToInsert, const T &Elt) { + // Convert iterator to elt# to avoid invalidating iterator when we reserve() + size_t InsertElt = I - this->begin(); + if (I == this->end()) { // Important special case for empty vector. append(NumToInsert, Elt); - return this->end()-1; + return this->begin()+InsertElt; } - // Convert iterator to elt# to avoid invalidating iterator when we reserve() - size_t InsertElt = I - this->begin(); - // Ensure there is enough space. reserve(static_cast<unsigned>(this->size() + NumToInsert)); @@ -588,14 +588,15 @@ public: template<typename ItTy> iterator insert(iterator I, ItTy From, ItTy To) { + // Convert iterator to elt# to avoid invalidating iterator when we reserve() + size_t InsertElt = I - this->begin(); + if (I == this->end()) { // Important special case for empty vector. append(From, To); - return this->end()-1; + return this->begin()+InsertElt; } size_t NumToInsert = std::distance(From, To); - // Convert iterator to elt# to avoid invalidating iterator when we reserve() - size_t InsertElt = I - this->begin(); // Ensure there is enough space. reserve(static_cast<unsigned>(this->size() + NumToInsert)); @@ -628,9 +629,9 @@ public: this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); // Replace the overwritten part. - for (; NumOverwritten > 0; --NumOverwritten) { - *I = *From; - ++I; ++From; + for (T *J = I; NumOverwritten > 0; --NumOverwritten) { + *J = *From; + ++J; ++From; } // Insert the non-overwritten middle part. diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index 6e8e424636..45be59b974 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -152,7 +152,7 @@ EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<BasicBlock>); EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>); template<class NodeT> -static raw_ostream &operator<<(raw_ostream &o, +inline raw_ostream &operator<<(raw_ostream &o, const DomTreeNodeBase<NodeT> *Node) { if (Node->getBlock()) WriteAsOperand(o, Node->getBlock(), false); @@ -165,7 +165,7 @@ static raw_ostream &operator<<(raw_ostream &o, } template<class NodeT> -static void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &o, +inline void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &o, unsigned Lev) { o.indent(2*Lev) << "[" << Lev << "] " << N; for (typename DomTreeNodeBase<NodeT>::const_iterator I = N->begin(), diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 91feaaac03..eeb482d82a 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -46,7 +46,7 @@ namespace llvm { template<typename T> -static void RemoveFromVector(std::vector<T*> &V, T *N) { +inline void RemoveFromVector(std::vector<T*> &V, T *N) { typename std::vector<T*>::iterator I = std::find(V.begin(), V.end(), N); assert(I != V.end() && "N is not in this list!"); V.erase(I); @@ -97,6 +97,9 @@ public: BlockT *getHeader() const { return Blocks.front(); } LoopT *getParentLoop() const { return ParentLoop; } + /// setParentLoop is a raw interface for bypassing addChildLoop. + void setParentLoop(LoopT *L) { ParentLoop = L; } + /// contains - Return true if the specified loop is contained within in /// this loop. /// @@ -122,14 +125,20 @@ public: /// iterator/begin/end - Return the loops contained entirely within this loop. /// const std::vector<LoopT *> &getSubLoops() const { return SubLoops; } + std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; } typedef typename std::vector<LoopT *>::const_iterator iterator; + typedef typename std::vector<LoopT *>::const_reverse_iterator + reverse_iterator; iterator begin() const { return SubLoops.begin(); } iterator end() const { return SubLoops.end(); } + reverse_iterator rbegin() const { return SubLoops.rbegin(); } + reverse_iterator rend() const { return SubLoops.rend(); } bool empty() const { return SubLoops.empty(); } /// getBlocks - Get a list of the basic blocks which make up this loop. /// const std::vector<BlockT*> &getBlocks() const { return Blocks; } + std::vector<BlockT*> &getBlocksVector() { return Blocks; } typedef typename std::vector<BlockT*>::const_iterator block_iterator; block_iterator block_begin() const { return Blocks.begin(); } block_iterator block_end() const { return Blocks.end(); } @@ -181,83 +190,26 @@ public: /// outside of the loop. These are the blocks _inside of the current loop_ /// which branch out. The returned list is always unique. /// - void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); - std::sort(LoopBBs.begin(), LoopBBs.end()); - - typedef GraphTraits<BlockT*> BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) { - // Not in current loop? It must be an exit block. - ExitingBlocks.push_back(*BI); - break; - } - } + void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const; /// getExitingBlock - If getExitingBlocks would return exactly one block, /// return that block. Otherwise return null. - BlockT *getExitingBlock() const { - SmallVector<BlockT*, 8> ExitingBlocks; - getExitingBlocks(ExitingBlocks); - if (ExitingBlocks.size() == 1) - return ExitingBlocks[0]; - return 0; - } + BlockT *getExitingBlock() const; /// getExitBlocks - Return all of the successor blocks of this loop. These /// are the blocks _outside of the current loop_ which are branched to. /// - void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); - std::sort(LoopBBs.begin(), LoopBBs.end()); - - typedef GraphTraits<BlockT*> BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) - // Not in current loop? It must be an exit block. - ExitBlocks.push_back(*I); - } + void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const; /// getExitBlock - If getExitBlocks would return exactly one block, /// return that block. Otherwise return null. - BlockT *getExitBlock() const { - SmallVector<BlockT*, 8> ExitBlocks; - getExitBlocks(ExitBlocks); - if (ExitBlocks.size() == 1) - return ExitBlocks[0]; - return 0; - } + BlockT *getExitBlock() const; /// Edge type. - typedef std::pair<BlockT*, BlockT*> Edge; + typedef std::pair<const BlockT*, const BlockT*> Edge; /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). - template <typename EdgeT> - void getExitEdges(SmallVectorImpl<EdgeT> &ExitEdges) const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); - array_pod_sort(LoopBBs.begin(), LoopBBs.end()); - - typedef GraphTraits<BlockT*> BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) - // Not in current loop? It must be an exit block. - ExitEdges.push_back(EdgeT(*BI, *I)); - } + void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const; /// getLoopPreheader - If there is a preheader for this loop, return it. A /// loop has a preheader if there is only one edge to the header of the loop @@ -266,71 +218,18 @@ public: /// /// This method returns null if there is no preheader for the loop. /// - BlockT *getLoopPreheader() const { - // Keep track of nodes outside the loop branching to the header... - BlockT *Out = getLoopPredecessor(); - if (!Out) return 0; - - // Make sure there is only one exit out of the preheader. - typedef GraphTraits<BlockT*> BlockTraits; - typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); - ++SI; - if (SI != BlockTraits::child_end(Out)) - return 0; // Multiple exits from the block, must not be a preheader. - - // The predecessor has exactly one successor, so it is a preheader. - return Out; - } + BlockT *getLoopPreheader() const; /// getLoopPredecessor - If the given loop's header has exactly one unique /// predecessor outside the loop, return it. Otherwise return null. /// This is less strict that the loop "preheader" concept, which requires /// the predecessor to have exactly one successor. /// - BlockT *getLoopPredecessor() const { - // Keep track of nodes outside the loop branching to the header... - BlockT *Out = 0; - - // Loop over the predecessors of the header node... - BlockT *Header = getHeader(); - typedef GraphTraits<BlockT*> BlockTraits; - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(Header), - PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { - typename InvBlockTraits::NodeType *N = *PI; - if (!contains(N)) { // If the block is not in the loop... - if (Out && Out != N) - return 0; // Multiple predecessors outside the loop - Out = N; - } - } - - // Make sure there is only one exit out of the preheader. - assert(Out && "Header of loop has no predecessors from outside loop?"); - return Out; - } + BlockT *getLoopPredecessor() const; /// getLoopLatch - If there is a single latch block for this loop, return it. /// A latch block is a block that contains a branch back to the header. - BlockT *getLoopLatch() const { - BlockT *Header = getHeader(); - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; - typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(Header); - typename InvBlockTraits::ChildIteratorType PE = - InvBlockTraits::child_end(Header); - BlockT *Latch = 0; - for (; PI != PE; ++PI) { - typename InvBlockTraits::NodeType *N = *PI; - if (contains(N)) { - if (Latch) return 0; - Latch = N; - } - } - - return Latch; - } + BlockT *getLoopLatch() const; //===--------------------------------------------------------------------===// // APIs for updating loop information after changing the CFG @@ -348,17 +247,7 @@ public: /// the OldChild entry in our children list with NewChild, and updates the /// parent pointer of OldChild to be null and the NewChild to be this loop. /// This updates the loop depth of the new child. - void replaceChildLoopWith(LoopT *OldChild, - LoopT *NewChild) { - assert(OldChild->ParentLoop == this && "This loop is already broken!"); - assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); - typename std::vector<LoopT *>::iterator I = - std::find(SubLoops.begin(), SubLoops.end(), OldChild); - assert(I != SubLoops.end() && "OldChild not in loop!"); - *I = NewChild; - OldChild->ParentLoop = 0; - NewChild->ParentLoop = static_cast<LoopT *>(this); - } + void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild); /// addChildLoop - Add the specified loop to be a child of this loop. This /// updates the loop depth of the new child. @@ -411,121 +300,12 @@ public: } /// verifyLoop - Verify loop structure - void verifyLoop() const { -#ifndef NDEBUG - assert(!Blocks.empty() && "Loop header is missing"); - - // Setup for using a depth-first iterator to visit every block in the loop. - SmallVector<BlockT*, 8> ExitBBs; - getExitBlocks(ExitBBs); - llvm::SmallPtrSet<BlockT*, 8> VisitSet; - VisitSet.insert(ExitBBs.begin(), ExitBBs.end()); - df_ext_iterator<BlockT*, llvm::SmallPtrSet<BlockT*, 8> > - BI = df_ext_begin(getHeader(), VisitSet), - BE = df_ext_end(getHeader(), VisitSet); - - // Keep track of the number of BBs visited. - unsigned NumVisited = 0; - - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); - std::sort(LoopBBs.begin(), LoopBBs.end()); - - // Check the individual blocks. - for ( ; BI != BE; ++BI) { - BlockT *BB = *BI; - bool HasInsideLoopSuccs = false; - bool HasInsideLoopPreds = false; - SmallVector<BlockT *, 2> OutsideLoopPreds; - - typedef GraphTraits<BlockT*> BlockTraits; - for (typename BlockTraits::ChildIteratorType SI = - BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB); - SI != SE; ++SI) - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *SI)) { - HasInsideLoopSuccs = true; - break; - } - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); - PI != PE; ++PI) { - BlockT *N = *PI; - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N)) - HasInsideLoopPreds = true; - else - OutsideLoopPreds.push_back(N); - } - - if (BB == getHeader()) { - assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); - } else if (!OutsideLoopPreds.empty()) { - // A non-header loop shouldn't be reachable from outside the loop, - // though it is permitted if the predecessor is not itself actually - // reachable. - BlockT *EntryBB = BB->getParent()->begin(); - for (df_iterator<BlockT *> NI = df_begin(EntryBB), - NE = df_end(EntryBB); NI != NE; ++NI) - for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) - assert(*NI != OutsideLoopPreds[i] && - "Loop has multiple entry points!"); - } - assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); - assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); - assert(BB != getHeader()->getParent()->begin() && - "Loop contains function entry block!"); - - NumVisited++; - } - - assert(NumVisited == getNumBlocks() && "Unreachable block in loop"); - - // Check the subloops. - for (iterator I = begin(), E = end(); I != E; ++I) - // Each block in each subloop should be contained within this loop. - for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end(); - BI != BE; ++BI) { - assert(std::binary_search(LoopBBs.begin(), LoopBBs.end(), *BI) && - "Loop does not contain all the blocks of a subloop!"); - } - - // Check the parent loop pointer. - if (ParentLoop) { - assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) != - ParentLoop->end() && - "Loop is not a subloop of its parent!"); - } -#endif - } + void verifyLoop() const; /// verifyLoop - Verify loop structure of this loop and all nested loops. - void verifyLoopNest(DenseSet<const LoopT*> *Loops) const { - Loops->insert(static_cast<const LoopT *>(this)); - // Verify this loop. - verifyLoop(); - // Verify the subloops. - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->verifyLoopNest(Loops); - } + void verifyLoopNest(DenseSet<const LoopT*> *Loops) const; - void print(raw_ostream &OS, unsigned Depth = 0) const { - OS.indent(Depth*2) << "Loop at depth " << getLoopDepth() - << " containing: "; - - for (unsigned i = 0; i < getBlocks().size(); ++i) { - if (i) OS << ","; - BlockT *BB = getBlocks()[i]; - WriteAsOperand(OS, BB, false); - if (BB == getHeader()) OS << "<header>"; - if (BB == getLoopLatch()) OS << "<latch>"; - if (isLoopExiting(BB)) OS << "<exiting>"; - } - OS << "\n"; - - for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->print(OS, Depth+2); - } + void print(raw_ostream &OS, unsigned Depth = 0) const; protected: friend class LoopInfoBase<BlockT, LoopT>; @@ -540,6 +320,11 @@ raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { return OS; } +// Implementation in LoopInfoImpl.h +#ifdef __GNUC__ +__extension__ extern template class LoopBase<BasicBlock, Loop>; +#endif + class Loop : public LoopBase<BasicBlock, Loop> { public: Loop() {} @@ -650,8 +435,12 @@ public: /// function. /// typedef typename std::vector<LoopT *>::const_iterator iterator; + typedef typename std::vector<LoopT *>::const_reverse_iterator + reverse_iterator; iterator begin() const { return TopLevelLoops.begin(); } iterator end() const { return TopLevelLoops.end(); } + reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); } + reverse_iterator rend() const { return TopLevelLoops.rend(); } bool empty() const { return TopLevelLoops.empty(); } /// getLoopFor - Return the inner most loop that BB lives in. If a basic @@ -744,189 +533,19 @@ public: return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop); } - void Calculate(DominatorTreeBase<BlockT> &DT) { - BlockT *RootNode = DT.getRootNode()->getBlock(); - - for (df_iterator<BlockT*> NI = df_begin(RootNode), - NE = df_end(RootNode); NI != NE; ++NI) - if (LoopT *L = ConsiderForLoop(*NI, DT)) - TopLevelLoops.push_back(L); - } - - LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) { - if (BBMap.count(BB)) return 0; // Haven't processed this node? - - std::vector<BlockT *> TodoStack; - - // Scan the predecessors of BB, checking to see if BB dominates any of - // them. This identifies backedges which target this node... - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType I = - InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB); - I != E; ++I) { - typename InvBlockTraits::NodeType *N = *I; - // If BB dominates its predecessor... - if (DT.dominates(BB, N) && DT.isReachableFromEntry(N)) - TodoStack.push_back(N); - } - - if (TodoStack.empty()) return 0; // No backedges to this block... - - // Create a new loop to represent this basic block... - LoopT *L = new LoopT(BB); - BBMap[BB] = L; - - while (!TodoStack.empty()) { // Process all the nodes in the loop - BlockT *X = TodoStack.back(); - TodoStack.pop_back(); - - if (!L->contains(X) && // As of yet unprocessed?? - DT.isReachableFromEntry(X)) { - // Check to see if this block already belongs to a loop. If this occurs - // then we have a case where a loop that is supposed to be a child of - // the current loop was processed before the current loop. When this - // occurs, this child loop gets added to a part of the current loop, - // making it a sibling to the current loop. We have to reparent this - // loop. - if (LoopT *SubLoop = - const_cast<LoopT *>(getLoopFor(X))) - if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){ - // Remove the subloop from its current parent... - assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L); - LoopT *SLP = SubLoop->ParentLoop; // SubLoopParent - typename std::vector<LoopT *>::iterator I = - std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop); - assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?"); - SLP->SubLoops.erase(I); // Remove from parent... - - // Add the subloop to THIS loop... - SubLoop->ParentLoop = L; - L->SubLoops.push_back(SubLoop); - } - - // Normal case, add the block to our loop... - L->Blocks.push_back(X); - - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; - - // Add all of the predecessors of X to the end of the work stack... - TodoStack.insert(TodoStack.end(), InvBlockTraits::child_begin(X), - InvBlockTraits::child_end(X)); - } - } - - // If there are any loops nested within this loop, create them now! - for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(), - E = L->Blocks.end(); I != E; ++I) - if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) { - L->SubLoops.push_back(NewLoop); - NewLoop->ParentLoop = L; - } - - // Add the basic blocks that comprise this loop to the BBMap so that this - // loop can be found for them. - // - for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(), - E = L->Blocks.end(); I != E; ++I) - BBMap.insert(std::make_pair(*I, L)); - - // Now that we have a list of all of the child loops of this loop, check to - // see if any of them should actually be nested inside of each other. We - // can accidentally pull loops our of their parents, so we must make sure to - // organize the loop nests correctly now. - { - std::map<BlockT *, LoopT *> ContainingLoops; - for (unsigned i = 0; i != L->SubLoops.size(); ++i) { - LoopT *Child = L->SubLoops[i]; - assert(Child->getParentLoop() == L && "Not proper child loop?"); - - if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) { - // If there is already a loop which contains this loop, move this loop - // into the containing loop. - MoveSiblingLoopInto(Child, ContainingLoop); - --i; // The loop got removed from the SubLoops list. - } else { - // This is currently considered to be a top-level loop. Check to see - // if any of the contained blocks are loop headers for subloops we - // have already processed. - for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) { - LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]]; - if (BlockLoop == 0) { // Child block not processed yet... - BlockLoop = Child; - } else if (BlockLoop != Child) { - LoopT *SubLoop = BlockLoop; - // Reparent all of the blocks which used to belong to BlockLoops - for (unsigned j = 0, f = SubLoop->Blocks.size(); j != f; ++j) - ContainingLoops[SubLoop->Blocks[j]] = Child; - - // There is already a loop which contains this block, that means - // that we should reparent the loop which the block is currently - // considered to belong to to be a child of this loop. - MoveSiblingLoopInto(SubLoop, Child); - --i; // We just shrunk the SubLoops list. - } - } - } - } - } - - return L; - } - - /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside - /// of the NewParent Loop, instead of being a sibling of it. - void MoveSiblingLoopInto(LoopT *NewChild, - LoopT *NewParent) { - LoopT *OldParent = NewChild->getParentLoop(); - assert(OldParent && OldParent == NewParent->getParentLoop() && - NewChild != NewParent && "Not sibling loops!"); - - // Remove NewChild from being a child of OldParent - typename std::vector<LoopT *>::iterator I = - std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(), - NewChild); - assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??"); - OldParent->SubLoops.erase(I); // Remove from parent's subloops list - NewChild->ParentLoop = 0; - - InsertLoopInto(NewChild, NewParent); - } - - /// InsertLoopInto - This inserts loop L into the specified parent loop. If - /// the parent loop contains a loop which should contain L, the loop gets - /// inserted into L instead. - void InsertLoopInto(LoopT *L, LoopT *Parent) { - BlockT *LHeader = L->getHeader(); - assert(Parent->contains(LHeader) && - "This loop should not be inserted here!"); - - // Check to see if it belongs in a child loop... - for (unsigned i = 0, e = static_cast<unsigned>(Parent->SubLoops.size()); - i != e; ++i) - if (Parent->SubLoops[i]->contains(LHeader)) { - InsertLoopInto(L, Parent->SubLoops[i]); - return; - } - - // If not, insert it here! - Parent->SubLoops.push_back(L); - L->ParentLoop = Parent; - } + /// Create the loop forest using a stable algorithm. + void Analyze(DominatorTreeBase<BlockT> &DomTree); // Debugging - void print(raw_ostream &OS) const { - for (unsigned i = 0; i < TopLevelLoops.size(); ++i) - TopLevelLoops[i]->print(OS); - #if 0 - for (DenseMap<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(), - E = BBMap.end(); I != E; ++I) - OS << "BB '" << I->first->getName() << "' level = " - << I->second->getLoopDepth() << "\n"; - #endif - } + void print(raw_ostream &OS) const; }; +// Implementation in LoopInfoImpl.h +#ifdef __GNUC__ +__extension__ extern template class LoopInfoBase<BasicBlock, Loop>; +#endif + class LoopInfo : public FunctionPass { LoopInfoBase<BasicBlock, Loop> LI; friend class LoopBase<BasicBlock, Loop>; @@ -946,8 +565,11 @@ public: /// function. /// typedef LoopInfoBase<BasicBlock, Loop>::iterator iterator; + typedef LoopInfoBase<BasicBlock, Loop>::reverse_iterator reverse_iterator; inline iterator begin() const { return LI.begin(); } inline iterator end() const { return LI.end(); } + inline reverse_iterator rbegin() const { return LI.rbegin(); } + inline reverse_iterator rend() const { return LI.rend(); } bool empty() const { return LI.empty(); } /// getLoopFor - Return the inner most loop that BB lives in. If a basic @@ -1074,27 +696,6 @@ template <> struct GraphTraits<Loop*> { } }; -template<class BlockT, class LoopT> -void -LoopBase<BlockT, LoopT>::addBasicBlockToLoop(BlockT *NewBB, - LoopInfoBase<BlockT, LoopT> &LIB) { - assert((Blocks.empty() || LIB[getHeader()] == this) && - "Incorrect LI specified for this loop!"); - assert(NewBB && "Cannot add a null basic block to the loop!"); - assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!"); - - LoopT *L = static_cast<LoopT *>(this); - - // Add the loop mapping to the LoopInfo object... - LIB.BBMap[NewBB] = L; - - // Add the basic block to this loop and all parent loops... - while (L) { - L->Blocks.push_back(NewBB); - L = L->getParentLoop(); - } -} - } // End llvm namespace #endif diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h new file mode 100644 index 0000000000..c07fbf7aa8 --- /dev/null +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -0,0 +1,570 @@ +//===- llvm/Analysis/LoopInfoImpl.h - Natural Loop Calculator ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the generic implementation of LoopInfo used for both Loops and +// MachineLoops. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOP_INFO_IMPL_H +#define LLVM_ANALYSIS_LOOP_INFO_IMPL_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/PostOrderIterator.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +// APIs for simple analysis of the loop. See header notes. + +/// getExitingBlocks - Return all blocks inside the loop that have successors +/// outside of the loop. These are the blocks _inside of the current loop_ +/// which branch out. The returned list is always unique. +/// +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>:: +getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + typedef GraphTraits<BlockT*> BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) + for (typename BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) + if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) { + // Not in current loop? It must be an exit block. + ExitingBlocks.push_back(*BI); + break; + } +} + +/// getExitingBlock - If getExitingBlocks would return exactly one block, +/// return that block. Otherwise return null. +template<class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const { + SmallVector<BlockT*, 8> ExitingBlocks; + getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() == 1) + return ExitingBlocks[0]; + return 0; +} + +/// getExitBlocks - Return all of the successor blocks of this loop. These +/// are the blocks _outside of the current loop_ which are branched to. +/// +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>:: +getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + typedef GraphTraits<BlockT*> BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) + for (typename BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) + if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + // Not in current loop? It must be an exit block. + ExitBlocks.push_back(*I); +} + +/// getExitBlock - If getExitBlocks would return exactly one block, +/// return that block. Otherwise return null. +template<class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const { + SmallVector<BlockT*, 8> ExitBlocks; + getExitBlocks(ExitBlocks); + if (ExitBlocks.size() == 1) + return ExitBlocks[0]; + return 0; +} + +/// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>:: +getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); + array_pod_sort(LoopBBs.begin(), LoopBBs.end()); + + typedef GraphTraits<BlockT*> BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) + for (typename BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) + if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + // Not in current loop? It must be an exit block. + ExitEdges.push_back(Edge(*BI, *I)); +} + +/// getLoopPreheader - If there is a preheader for this loop, return it. A +/// loop has a preheader if there is only one edge to the header of the loop +/// from outside of the loop. If this is the case, the block branching to the +/// header of the loop is the preheader node. +/// +/// This method returns null if there is no preheader for the loop. +/// +template<class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { + // Keep track of nodes outside the loop branching to the header... + BlockT *Out = getLoopPredecessor(); + if (!Out) return 0; + + // Make sure there is only one exit out of the preheader. + typedef GraphTraits<BlockT*> BlockTraits; + typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); + ++SI; + if (SI != BlockTraits::child_end(Out)) + return 0; // Multiple exits from the block, must not be a preheader. + + // The predecessor has exactly one successor, so it is a preheader. + return Out; +} + +/// getLoopPredecessor - If the given loop's header has exactly one unique +/// predecessor outside the loop, return it. Otherwise return null. +/// This is less strict that the loop "preheader" concept, which requires +/// the predecessor to have exactly one successor. +/// +template<class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { + // Keep track of nodes outside the loop branching to the header... + BlockT *Out = 0; + + // Loop over the predecessors of the header node... + BlockT *Header = getHeader(); + typedef GraphTraits<BlockT*> BlockTraits; + typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(Header), + PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { + typename InvBlockTraits::NodeType *N = *PI; + if (!contains(N)) { // If the block is not in the loop... + if (Out && Out != N) + return 0; // Multiple predecessors outside the loop + Out = N; + } + } + + // Make sure there is only one exit out of the preheader. + assert(Out && "Header of loop has no predecessors from outside loop?"); + return Out; +} + +/// getLoopLatch - If there is a single latch block for this loop, return it. +/// A latch block is a block that contains a branch back to the header. +template<class BlockT, class LoopT> +BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const { + BlockT *Header = getHeader(); + typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; + typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(Header); + typename InvBlockTraits::ChildIteratorType PE = + InvBlockTraits::child_end(Header); + BlockT *Latch = 0; + for (; PI != PE; ++PI) { + typename InvBlockTraits::NodeType *N = *PI; + if (contains(N)) { + if (Latch) return 0; + Latch = N; + } + } + + return Latch; +} + +//===----------------------------------------------------------------------===// +// APIs for updating loop information after changing the CFG +// + +/// addBasicBlockToLoop - This method is used by other analyses to update loop +/// information. NewBB is set to be a new member of the current loop. +/// Because of this, it is added as a member of all parent loops, and is added +/// to the specified LoopInfo object as being in the current basic block. It +/// is not valid to replace the loop header with this method. +/// +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>:: +addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { + assert((Blocks.empty() || LIB[getHeader()] == this) && + "Incorrect LI specified for this loop!"); + assert(NewBB && "Cannot add a null basic block to the loop!"); + assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!"); + + LoopT *L = static_cast<LoopT *>(this); + + // Add the loop mapping to the LoopInfo object... + LIB.BBMap[NewBB] = L; + + // Add the basic block to this loop and all parent loops... + while (L) { + L->Blocks.push_back(NewBB); + L = L->getParentLoop(); + } +} + +/// replaceChildLoopWith - This is used when splitting loops up. It replaces +/// the OldChild entry in our children list with NewChild, and updates the +/// parent pointer of OldChild to be null and the NewChild to be this loop. +/// This updates the loop depth of the new child. +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>:: +replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild) { + assert(OldChild->ParentLoop == this && "This loop is already broken!"); + assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); + typename std::vector<LoopT *>::iterator I = + std::find(SubLoops.begin(), SubLoops.end(), OldChild); + assert(I != SubLoops.end() && "OldChild not in loop!"); + *I = NewChild; + OldChild->ParentLoop = 0; + NewChild->ParentLoop = static_cast<LoopT *>(this); +} + +/// verifyLoop - Verify loop structure +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::verifyLoop() const { +#ifndef NDEBUG + assert(!Blocks.empty() && "Loop header is missing"); + + // Setup for using a depth-first iterator to visit every block in the loop. + SmallVector<BlockT*, 8> ExitBBs; + getExitBlocks(ExitBBs); + llvm::SmallPtrSet<BlockT*, 8> VisitSet; + VisitSet.insert(ExitBBs.begin(), ExitBBs.end()); + df_ext_iterator<BlockT*, llvm::SmallPtrSet<BlockT*, 8> > + BI = df_ext_begin(getHeader(), VisitSet), + BE = df_ext_end(getHeader(), VisitSet); + + // Keep track of the number of BBs visited. + unsigned NumVisited = 0; + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + // Check the individual blocks. + for ( ; BI != BE; ++BI) { + BlockT *BB = *BI; + bool HasInsideLoopSuccs = false; + bool HasInsideLoopPreds = false; + SmallVector<BlockT *, 2> OutsideLoopPreds; + + typedef GraphTraits<BlockT*> BlockTraits; + for (typename BlockTraits::ChildIteratorType SI = + BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB); + SI != SE; ++SI) + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *SI)) { + HasInsideLoopSuccs = true; + break; + } + typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); + PI != PE; ++PI) { + BlockT *N = *PI; + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N)) + HasInsideLoopPreds = true; + else + OutsideLoopPreds.push_back(N); + } + + if (BB == getHeader()) { + assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); + } else if (!OutsideLoopPreds.empty()) { + // A non-header loop shouldn't be reachable from outside the loop, + // though it is permitted if the predecessor is not itself actually + // reachable. + BlockT *EntryBB = BB->getParent()->begin(); + for (df_iterator<BlockT *> NI = df_begin(EntryBB), + NE = df_end(EntryBB); NI != NE; ++NI) + for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) + assert(*NI != OutsideLoopPreds[i] && + "Loop has multiple entry points!"); + } + assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); + assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); + assert(BB != getHeader()->getParent()->begin() && + "Loop contains function entry block!"); + + NumVisited++; + } + + assert(NumVisited == getNumBlocks() && "Unreachable block in loop"); + + // Check the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + // Each block in each subloop should be contained within this loop. + for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end(); + BI != BE; ++BI) { + assert(std::binary_search(LoopBBs.begin(), LoopBBs.end(), *BI) && + "Loop does not contain all the blocks of a subloop!"); + } + + // Check the parent loop pointer. + if (ParentLoop) { + assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) != + ParentLoop->end() && + "Loop is not a subloop of its parent!"); + } +#endif +} + +/// verifyLoop - Verify loop structure of this loop and all nested loops. +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::verifyLoopNest( + DenseSet<const LoopT*> *Loops) const { + Loops->insert(static_cast<const LoopT *>(this)); + // Verify this loop. + verifyLoop(); + // Verify the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->verifyLoopNest(Loops); +} + +template<class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const { + OS.indent(Depth*2) << "Loop at depth " << getLoopDepth() + << " containing: "; + + for (unsigned i = 0; i < getBlocks().size(); ++i) { + if (i) OS << ","; + BlockT *BB = getBlocks()[i]; + WriteAsOperand(OS, BB, false); + if (BB == getHeader()) OS << "<header>"; + if (BB == getLoopLatch()) OS << "<latch>"; + if (isLoopExiting(BB)) OS << "<exiting>"; + } + OS << "\n"; + + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->print(OS, Depth+2); +} + +//===----------------------------------------------------------------------===// +/// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the +/// result does / not depend on use list (block predecessor) order. +/// + +/// Discover a subloop with the specified backedges such that: All blocks within +/// this loop are mapped to this loop or a subloop. And all subloops within this +/// loop have their parent loop set to this loop or a subloop. +template<class BlockT, class LoopT> +static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges, + LoopInfoBase<BlockT, LoopT> *LI, + DominatorTreeBase<BlockT> &DomTree) { + typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; + + unsigned NumBlocks = 0; + unsigned NumSubloops = 0; + + // Perform a backward CFG traversal using a worklist. + std::vector<BlockT *> ReverseCFGWorklist(Backedges.begin(), Backedges.end()); + while (!ReverseCFGWorklist.empty()) { + BlockT *PredBB = ReverseCFGWorklist.back(); + ReverseCFGWorklist.pop_back(); + + LoopT *Subloop = LI->getLoopFor(PredBB); + if (!Subloop) { + if (!DomTree.isReachableFromEntry(PredBB)) + continue; + + // This is an undiscovered block. Map it to the current loop. + LI->changeLoopFor(PredBB, L); + ++NumBlocks; + if (PredBB == L->getHeader()) + continue; + // Push all block predecessors on the worklist. + ReverseCFGWorklist.insert(ReverseCFGWorklist.end(), + InvBlockTraits::child_begin(PredBB), + InvBlockTraits::child_end(PredBB)); + } + else { + // This is a discovered block. Find its outermost discovered loop. + while (LoopT *Parent = Subloop->getParentLoop()) + Subloop = Parent; + + // If it is already discovered to be a subloop of this loop, continue. + if (Subloop == L) + continue; + + // Discover a subloop of this loop. + Subloop->setParentLoop(L); + ++NumSubloops; + NumBlocks += Subloop->getBlocks().capacity(); + PredBB = Subloop->getHeader(); + // Continue traversal along predecessors that are not loop-back edges from + // within this subloop tree itself. Note that a predecessor may directly + // reach another subloop that is not yet discovered to be a subloop of + // this loop, which we must traverse. + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(PredBB), + PE = InvBlockTraits::child_end(PredBB); PI != PE; ++PI) { + if (LI->getLoopFor(*PI) != Subloop) + ReverseCFGWorklist.push_back(*PI); + } + } + } + L->getSubLoopsVector().reserve(NumSubloops); + L->getBlocksVector().reserve(NumBlocks); +} + +namespace { +/// Populate all loop data in a stable order during a single forward DFS. +template<class BlockT, class LoopT> +class PopulateLoopsDFS { + typedef GraphTraits<BlockT*> BlockTraits; + typedef typename BlockTraits::ChildIteratorType SuccIterTy; + + LoopInfoBase<BlockT, LoopT> *LI; + DenseSet<const BlockT *> VisitedBlocks; + std::vector<std::pair<BlockT*, SuccIterTy> > DFSStack; + +public: + PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li): + LI(li) {} + + void traverse(BlockT *EntryBlock); + +protected: + void insertIntoLoop(BlockT *Block); + + BlockT *dfsSource() { return DFSStack.back().first; } + SuccIterTy &dfsSucc() { return DFSStack.back().second; } + SuccIterTy dfsSuccEnd() { return BlockTraits::child_end(dfsSource()); } + + void pushBlock(BlockT *Block) { + DFSStack.push_back(std::make_pair(Block, BlockTraits::child_begin(Block))); + } +}; +} // anonymous + +/// Top-level driver for the forward DFS within the loop. +template<class BlockT, class LoopT> +void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) { + pushBlock(EntryBlock); + VisitedBlocks.insert(EntryBlock); + while (!DFSStack.empty()) { + // Traverse the leftmost path as far as possible. + while (dfsSucc() != dfsSuccEnd()) { + BlockT *BB = *dfsSucc(); + ++dfsSucc(); + if (!VisitedBlocks.insert(BB).second) + continue; + + // Push the next DFS successor onto the stack. + pushBlock(BB); + } + // Visit the top of the stack in postorder and backtrack. + insertIntoLoop(dfsSource()); + DFSStack.pop_back(); + } +} + +/// Add a single Block to its ancestor loops in PostOrder. If the block is a +/// subloop header, add the subloop to its parent in PostOrder, then reverse the +/// Block and Subloop vectors of the now complete subloop to achieve RPO. +template<class BlockT, class LoopT> +void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) { + LoopT *Subloop = LI->getLoopFor(Block); + if (Subloop && Block == Subloop->getHeader()) { + // We reach this point once per subloop after processing all the blocks in + // the subloop. + if (Subloop->getParentLoop()) + Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop); + else + LI->addTopLevelLoop(Subloop); + + // For convenience, Blocks and Subloops are inserted in postorder. Reverse + // the lists, except for the loop header, which is always at the beginning. + std::reverse(Subloop->getBlocksVector().begin()+1, + Subloop->getBlocksVector().end()); + std::reverse(Subloop->getSubLoopsVector().begin(), + Subloop->getSubLoopsVector().end()); + + Subloop = Subloop->getParentLoop(); + } + for (; Subloop; Subloop = Subloop->getParentLoop()) + Subloop->getBlocksVector().push_back(Block); +} + +/// Analyze LoopInfo discovers loops during a postorder DominatorTree traversal +/// interleaved with backward CFG traversals within each subloop +/// (discoverAndMapSubloop). The backward traversal skips inner subloops, so +/// this part of the algorithm is linear in the number of CFG edges. Subloop and +/// Block vectors are then populated during a single forward CFG traversal +/// (PopulateLoopDFS). +/// +/// During the two CFG traversals each block is seen three times: +/// 1) Discovered and mapped by a reverse CFG traversal. +/// 2) Visited during a forward DFS CFG traversal. +/// 3) Reverse-inserted in the loop in postorder following forward DFS. +/// +/// The Block vectors are inclusive, so step 3 requires loop-depth number of +/// insertions per block. +template<class BlockT, class LoopT> +void LoopInfoBase<BlockT, LoopT>:: +Analyze(DominatorTreeBase<BlockT> &DomTree) { + + // Postorder traversal of the dominator tree. + DomTreeNodeBase<BlockT>* DomRoot = DomTree.getRootNode(); + for (po_iterator<DomTreeNodeBase<BlockT>*> DomIter = po_begin(DomRoot), + DomEnd = po_end(DomRoot); DomIter != DomEnd; ++DomIter) { + + BlockT *Header = DomIter->getBlock(); + SmallVector<BlockT *, 4> Backedges; + + // Check each predecessor of the potential loop header. + typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(Header), + PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { + + BlockT *Backedge = *PI; + + // If Header dominates predBB, this is a new loop. Collect the backedges. + if (DomTree.dominates(Header, Backedge) + && DomTree.isReachableFromEntry(Backedge)) { + Backedges.push_back(Backedge); + } + } + // Perform a backward CFG traversal to discover and map blocks in this loop. + if (!Backedges.empty()) { + LoopT *L = new LoopT(Header); + discoverAndMapSubloop(L, ArrayRef<BlockT*>(Backedges), this, DomTree); + } + } + // Perform a single forward CFG traversal to populate block and subloop + // vectors for all loops. + PopulateLoopsDFS<BlockT, LoopT> DFS(this); + DFS.traverse(DomRoot->getBlock()); +} + +// Debugging +template<class BlockT, class LoopT> +void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const { + for (unsigned i = 0; i < TopLevelLoops.size(); ++i) + TopLevelLoops[i]->print(OS); +#if 0 + for (DenseMap<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(), + E = BBMap.end(); I != E; ++I) + OS << "BB '" << I->first->getName() << "' level = " + << I->second->getLoopDepth() << "\n"; +#endif +} + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index e7dcbf3cfc..2dcd9fe087 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -15,6 +15,14 @@ #ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H #define LLVM_ANALYSIS_MEMORYBUILTINS_H +#include "llvm/IRBuilder.h" +#include "llvm/Operator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/TargetFolder.h" + namespace llvm { class CallInst; class PointerType; @@ -22,24 +30,44 @@ class TargetData; class Type; class Value; + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup +/// like). +bool isAllocationFn(const Value *V, bool LookThroughBitCast = false); + +/// \brief Tests if a value is a call or invoke to a function that returns a +/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). +bool isNoAliasFn(const Value *V, bool LookThroughBitCast = false); + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory (such as malloc). +bool isMallocLikeFn(const Value *V, bool LookThroughBitCast = false); + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates zero-filled memory (such as calloc). +bool isCallocLikeFn(const Value *V, bool LookThroughBitCast = false); + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory (either malloc, calloc, or strdup like). +bool isAllocLikeFn(const Value *V, bool LookThroughBitCast = false); + +/// \brief Tests if a value is a call or invoke to a library function that +/// reallocates memory (such as realloc). +bool isReallocLikeFn(const Value *V, bool LookThroughBitCast = false); + + //===----------------------------------------------------------------------===// // malloc Call Utility Functions. // -/// isMalloc - Returns true if the value is either a malloc call or a bitcast of -/// the result of a malloc call -bool isMalloc(const Value *I); - /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. const CallInst *extractMallocCall(const Value *I); -CallInst *extractMallocCall(Value *I); - -/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the -/// instruction is a bitcast of the result of a malloc call. -const CallInst *extractMallocCallFromBitCast(const Value *I); -CallInst *extractMallocCallFromBitCast(Value *I); +static inline CallInst *extractMallocCall(Value *I) { + return const_cast<CallInst*>(extractMallocCall((const Value*)I)); +} /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size @@ -67,7 +95,7 @@ Type *getMallocAllocatedType(const CallInst *CI); /// determined. Value *getMallocArraySize(CallInst *CI, const TargetData *TD, bool LookThroughSExt = false); - + //===----------------------------------------------------------------------===// // calloc Call Utility Functions. @@ -76,7 +104,9 @@ Value *getMallocArraySize(CallInst *CI, const TargetData *TD, /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. const CallInst *extractCallocCall(const Value *I); -CallInst *extractCallocCall(Value *I); +static inline CallInst *extractCallocCall(Value *I) { + return const_cast<CallInst*>(extractCallocCall((const Value*)I)); +} //===----------------------------------------------------------------------===// @@ -90,6 +120,130 @@ static inline CallInst *isFreeCall(Value *I) { return const_cast<CallInst*>(isFreeCall((const Value*)I)); } + +//===----------------------------------------------------------------------===// +// Utility functions to compute size of objects. +// + +/// \brief Compute the size of the object pointed by Ptr. Returns true and the +/// object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, + bool RoundToAlign = false); + + + +typedef std::pair<APInt, APInt> SizeOffsetType; + +/// \brief Evaluate the size and offset of an object ponted by a Value* +/// statically. Fails if size or offset are not known at compile time. +class ObjectSizeOffsetVisitor + : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> { + + const TargetData *TD; + bool RoundToAlign; + unsigned IntTyBits; + APInt Zero; + + APInt align(APInt Size, uint64_t Align); + + SizeOffsetType unknown() { + return std::make_pair(APInt(), APInt()); + } + +public: + ObjectSizeOffsetVisitor(const TargetData *TD, LLVMContext &Context, + bool RoundToAlign = false); + + SizeOffsetType compute(Value *V); + + bool knownSize(SizeOffsetType &SizeOffset) { + return SizeOffset.first.getBitWidth() > 1; + } + + bool knownOffset(SizeOffsetType &SizeOffset) { + return SizeOffset.second.getBitWidth() > 1; + } + + bool bothKnown(SizeOffsetType &SizeOffset) { + return knownSize(SizeOffset) && knownOffset(SizeOffset); + } + + SizeOffsetType visitAllocaInst(AllocaInst &I); + SizeOffsetType visitArgument(Argument &A); + SizeOffsetType visitCallSite(CallSite CS); + SizeOffsetType visitConstantPointerNull(ConstantPointerNull&); + SizeOffsetType visitExtractElementInst(ExtractElementInst &I); + SizeOffsetType visitExtractValueInst(ExtractValueInst &I); + SizeOffsetType visitGEPOperator(GEPOperator &GEP); + SizeOffsetType visitGlobalVariable(GlobalVariable &GV); + SizeOffsetType visitIntToPtrInst(IntToPtrInst&); + SizeOffsetType visitLoadInst(LoadInst &I); + SizeOffsetType visitPHINode(PHINode&); + SizeOffsetType visitSelectInst(SelectInst &I); + SizeOffsetType visitUndefValue(UndefValue&); + SizeOffsetType visitInstruction(Instruction &I); +}; + +typedef std::pair<Value*, Value*> SizeOffsetEvalType; + + +/// \brief Evaluate the size and offset of an object ponted by a Value*. +/// May create code to compute the result at run-time. +class ObjectSizeOffsetEvaluator + : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> { + + typedef IRBuilder<true, TargetFolder> BuilderTy; + typedef DenseMap<const Value*, SizeOffsetEvalType> CacheMapTy; + typedef SmallPtrSet<const Value*, 8> PtrSetTy; + + const TargetData *TD; + LLVMContext &Context; + BuilderTy Builder; + ObjectSizeOffsetVisitor Visitor; + IntegerType *IntTy; + Value *Zero; + CacheMapTy CacheMap; + PtrSetTy SeenVals; + + SizeOffsetEvalType unknown() { + return std::make_pair((Value*)0, (Value*)0); + } + SizeOffsetEvalType compute_(Value *V); + +public: + ObjectSizeOffsetEvaluator(const TargetData *TD, LLVMContext &Context); + SizeOffsetEvalType compute(Value *V); + + bool knownSize(SizeOffsetEvalType &SizeOffset) { + return SizeOffset.first; + } + + bool knownOffset(SizeOffsetEvalType &SizeOffset) { + return SizeOffset.second; + } + + bool anyKnown(SizeOffsetEvalType &SizeOffset) { + return knownSize(SizeOffset) || knownOffset(SizeOffset); + } + + bool bothKnown(SizeOffsetEvalType &SizeOffset) { + return knownSize(SizeOffset) && knownOffset(SizeOffset); + } + + SizeOffsetEvalType visitAllocaInst(AllocaInst &I); + SizeOffsetEvalType visitCallSite(CallSite CS); + SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I); + SizeOffsetEvalType visitExtractValueInst(ExtractValueInst &I); + SizeOffsetEvalType visitGEPOperator(GEPOperator &GEP); + SizeOffsetEvalType visitIntToPtrInst(IntToPtrInst&); + SizeOffsetEvalType visitLoadInst(LoadInst &I); + SizeOffsetEvalType visitPHINode(PHINode &PHI); + SizeOffsetEvalType visitSelectInst(SelectInst &I); + SizeOffsetEvalType visitInstruction(Instruction &I); +}; + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index c22fc3ab74..cbbe4295b0 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -14,9 +14,9 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H +#include "llvm/IRBuilder.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Support/ValueHandle.h" #include <set> diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index cc2b473f2c..dd96b043fc 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -71,8 +71,8 @@ namespace llvm { /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. /// - static inline bool isBitcodeWrapper(const unsigned char *BufPtr, - const unsigned char *BufEnd) { + inline bool isBitcodeWrapper(const unsigned char *BufPtr, + const unsigned char *BufEnd) { // See if you can find the hidden message in the magic bytes :-). // (Hint: it's a little-endian encoding.) return BufPtr != BufEnd && @@ -85,8 +85,8 @@ namespace llvm { /// isRawBitcode - Return true if the given bytes are the magic bytes for /// raw LLVM IR bitcode (without a wrapper). /// - static inline bool isRawBitcode(const unsigned char *BufPtr, - const unsigned char *BufEnd) { + inline bool isRawBitcode(const unsigned char *BufPtr, + const unsigned char *BufEnd) { // These bytes sort of have a hidden message, but it's not in // little-endian this time, and it's a little redundant. return BufPtr != BufEnd && @@ -99,8 +99,8 @@ namespace llvm { /// isBitcode - Return true if the given bytes are the magic bytes for /// LLVM IR bitcode, either with or without a wrapper. /// - static bool inline isBitcode(const unsigned char *BufPtr, - const unsigned char *BufEnd) { + inline bool isBitcode(const unsigned char *BufPtr, + const unsigned char *BufEnd) { return isBitcodeWrapper(BufPtr, BufEnd) || isRawBitcode(BufPtr, BufEnd); } @@ -121,9 +121,9 @@ namespace llvm { /// BC file. /// If 'VerifyBufferSize' is true, check that the buffer is large enough to /// contain the whole bitcode file. - static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, - const unsigned char *&BufEnd, - bool VerifyBufferSize) { + inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, + const unsigned char *&BufEnd, + bool VerifyBufferSize) { enum { KnownHeaderSize = 4*4, // Size of header we read. OffsetField = 2*4, // Offset in bytes to Offset field. diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 2e0184a61d..ad214ccb07 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -377,6 +377,13 @@ namespace llvm { void EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size) const; + /// EmitLabelReference - Emit something like ".long Label" + /// where the size in bytes of the directive is specified by Size and Label + /// specifies the label. + void EmitLabelReference(const MCSymbol *Label, unsigned Size) const { + EmitLabelPlusOffset(Label, 0, Size); + } + //===------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index 2b6c35b82f..20e33f74f6 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -71,7 +71,8 @@ namespace llvm { struct GCRoot { int Num; ///< Usually a frame index. int StackOffset; ///< Offset from the stack pointer. - const Constant *Metadata;//< Metadata straight from the call to llvm.gcroot. + const Constant *Metadata; ///< Metadata straight from the call + ///< to llvm.gcroot. GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {} }; diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 4d093c4cff..409e328f7f 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -585,8 +585,8 @@ namespace ISD { // DEBUGTRAP - Trap intended to get the attention of a debugger. DEBUGTRAP, - // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are - // their first operand. The other operands are the address to prefetch, + // PREFETCH - This corresponds to a prefetch intrinsic. The first operand + // is the chain. The other operands are the address to prefetch, // read / write specifier, locality specifier and instruction / data cache // specifier. PREFETCH, diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h index 5be102f0b3..e1911cfd82 100644 --- a/include/llvm/CodeGen/LexicalScopes.h +++ b/include/llvm/CodeGen/LexicalScopes.h @@ -158,7 +158,10 @@ class LexicalScope { public: LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), - LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0), IndentLevel(0) { + LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) { +#ifndef NDEBUG + IndentLevel = 0; +#endif // @LOCALMOD-BEGIN -- Hack for bug // http://code.google.com/p/nativeclient/issues/detail?id=2786 Desc.make_weak(); @@ -247,7 +250,9 @@ private: const MachineInstr *FirstInsn; // First instruction of this scope. unsigned DFSIn, DFSOut; // In & Out Depth use to determine // scope nesting. +#ifndef NDEBUG mutable unsigned IndentLevel; // Private state for dump() +#endif }; } // end llvm namespace diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index bd3604c429..a344b1ff1b 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -20,12 +20,13 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" @@ -61,8 +62,8 @@ namespace llvm { /// VNInfo::Allocator VNInfoAllocator; - typedef DenseMap<unsigned, LiveInterval*> Reg2IntervalMap; - Reg2IntervalMap R2IMap; + /// Live interval pointers for all the virtual registers. + IndexedMap<LiveInterval*, VirtReg2IndexFunctor> VirtRegIntervals; /// AllocatableRegs - A bit vector of allocatable registers. BitVector AllocatableRegs; @@ -108,28 +109,18 @@ namespace llvm { // Calculate the spill weight to assign to a single instruction. static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth); - typedef Reg2IntervalMap::iterator iterator; - typedef Reg2IntervalMap::const_iterator const_iterator; - const_iterator begin() const { return R2IMap.begin(); } - const_iterator end() const { return R2IMap.end(); } - iterator begin() { return R2IMap.begin(); } - iterator end() { return R2IMap.end(); } - unsigned getNumIntervals() const { return (unsigned)R2IMap.size(); } - - LiveInterval &getInterval(unsigned reg) { - Reg2IntervalMap::iterator I = R2IMap.find(reg); - assert(I != R2IMap.end() && "Interval does not exist for register"); - return *I->second; + LiveInterval &getInterval(unsigned Reg) { + LiveInterval *LI = VirtRegIntervals[Reg]; + assert(LI && "Interval does not exist for virtual register"); + return *LI; } - const LiveInterval &getInterval(unsigned reg) const { - Reg2IntervalMap::const_iterator I = R2IMap.find(reg); - assert(I != R2IMap.end() && "Interval does not exist for register"); - return *I->second; + const LiveInterval &getInterval(unsigned Reg) const { + return const_cast<LiveIntervals*>(this)->getInterval(Reg); } - bool hasInterval(unsigned reg) const { - return R2IMap.count(reg); + bool hasInterval(unsigned Reg) const { + return VirtRegIntervals.inBounds(Reg) && VirtRegIntervals[Reg]; } /// isAllocatable - is the physical register reg allocatable in the current @@ -144,12 +135,19 @@ namespace llvm { return ReservedRegs.test(reg); } - // Interval creation - LiveInterval &getOrCreateInterval(unsigned reg) { - Reg2IntervalMap::iterator I = R2IMap.find(reg); - if (I == R2IMap.end()) - I = R2IMap.insert(std::make_pair(reg, createInterval(reg))).first; - return *I->second; + // Interval creation. + LiveInterval &getOrCreateInterval(unsigned Reg) { + if (!hasInterval(Reg)) { + VirtRegIntervals.grow(Reg); + VirtRegIntervals[Reg] = createInterval(Reg); + } + return getInterval(Reg); + } + + // Interval removal. + void removeInterval(unsigned Reg) { + delete VirtRegIntervals[Reg]; + VirtRegIntervals[Reg] = 0; } /// addLiveRangeToEndOfBlock - Given a register and an instruction, @@ -167,14 +165,6 @@ namespace llvm { bool shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead = 0); - // Interval removal - - void removeInterval(unsigned Reg) { - DenseMap<unsigned, LiveInterval*>::iterator I = R2IMap.find(Reg); - delete I->second; - R2IMap.erase(I); - } - SlotIndexes *getSlotIndexes() const { return Indexes; } @@ -347,18 +337,18 @@ namespace llvm { return *LI; } - /// trackingRegUnits - Does LiveIntervals curently track register units? - /// This function will be removed when regunit tracking is permanently - /// enabled. - bool trackingRegUnits() const { return !RegUnitIntervals.empty(); } + /// getCachedRegUnit - Return the live range for Unit if it has already + /// been computed, or NULL if it hasn't been computed yet. + LiveInterval *getCachedRegUnit(unsigned Unit) { + return RegUnitIntervals[Unit]; + } private: /// computeIntervals - Compute live intervals. void computeIntervals(); /// handleRegisterDef - update intervals for a register def - /// (calls handlePhysicalRegisterDef and - /// handleVirtualRegisterDef) + /// (calls handleVirtualRegisterDef) void handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, SlotIndex MIIdx, @@ -378,18 +368,6 @@ namespace llvm { unsigned MOIdx, LiveInterval& interval); - /// handlePhysicalRegisterDef - update intervals for a physical register - /// def. - void handlePhysicalRegisterDef(MachineBasicBlock* mbb, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, MachineOperand& MO, - LiveInterval &interval); - - /// handleLiveInRegister - Create interval for a livein register. - void handleLiveInRegister(MachineBasicBlock* mbb, - SlotIndex MIIdx, - LiveInterval &interval); - static LiveInterval* createInterval(unsigned Reg); void printInstrs(raw_ostream &O) const; diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 44402a9e68..8b958e437e 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -359,7 +359,7 @@ public: assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); Objects[ObjectIdx+NumFixedObjects].Alignment = Align; - MaxAlignment = std::max(MaxAlignment, Align); + ensureMaxAlignment(Align); } /// NeedsStackProtector - Returns true if the object may need stack @@ -416,9 +416,11 @@ public: /// unsigned getMaxAlignment() const { return MaxAlignment; } - /// setMaxAlignment - Set the preferred alignment. - /// - void setMaxAlignment(unsigned Align) { MaxAlignment = Align; } + /// ensureMaxAlignment - Make sure the function is at least Align bytes + /// aligned. + void ensureMaxAlignment(unsigned Align) { + if (MaxAlignment < Align) MaxAlignment = Align; + } /// AdjustsStack - Return true if this function adjusts the stack -- e.g., /// when calling another function. This is only valid during and after @@ -485,7 +487,7 @@ public: Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); - MaxAlignment = std::max(MaxAlignment, Alignment); + ensureMaxAlignment(Alignment); return Index; } @@ -496,7 +498,7 @@ public: int CreateSpillStackObject(uint64_t Size, unsigned Alignment) { CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; - MaxAlignment = std::max(MaxAlignment, Alignment); + ensureMaxAlignment(Alignment); return Index; } @@ -515,7 +517,7 @@ public: int CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; Objects.push_back(StackObject(0, Alignment, 0, false, false, true)); - MaxAlignment = std::max(MaxAlignment, Alignment); + ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 0fb4969822..dc5f9a6ec8 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -43,14 +43,14 @@ bool finalizeBundles(MachineFunction &MF); /// getBundleStart - Returns the first instruction in the bundle containing MI. /// -static inline MachineInstr *getBundleStart(MachineInstr *MI) { +inline MachineInstr *getBundleStart(MachineInstr *MI) { MachineBasicBlock::instr_iterator I = MI; while (I->isInsideBundle()) --I; return I; } -static inline const MachineInstr *getBundleStart(const MachineInstr *MI) { +inline const MachineInstr *getBundleStart(const MachineInstr *MI) { MachineBasicBlock::const_instr_iterator I = MI; while (I->isInsideBundle()) --I; diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 6dd9440500..3e204bed15 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file defines the MachineLoopInfo class that is used to identify natural +// This file defines the MachineLoopInfo class that is used to identify natural // loops and determine the loop depth of various nodes of the CFG. Note that // natural loops may actually be several loops that share the same header node. // @@ -35,6 +35,12 @@ namespace llvm { +// Implementation in LoopInfoImpl.h +#ifdef __GNUC__ +class MachineLoop; +__extension__ extern template class LoopBase<MachineBasicBlock, MachineLoop>; +#endif + class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> { public: MachineLoop(); @@ -57,6 +63,12 @@ private: : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {} }; +// Implementation in LoopInfoImpl.h +#ifdef __GNUC__ +__extension__ extern template +class LoopInfoBase<MachineBasicBlock, MachineLoop>; +#endif + class MachineLoopInfo : public MachineFunctionPass { LoopInfoBase<MachineBasicBlock, MachineLoop> LI; friend class LoopBase<MachineBasicBlock, MachineLoop>; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 5a82caa9ac..2bcd1c72ce 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -237,6 +237,11 @@ public: /// form, so there should only be one definition. MachineInstr *getVRegDef(unsigned Reg) const; + /// getUniqueVRegDef - Return the unique machine instr that defines the + /// specified virtual register or null if none is found. If there are + /// multiple definitions or no definition, return null. + MachineInstr *getUniqueVRegDef(unsigned Reg) const; + /// clearKillFlags - Iterate over all the uses of the given register and /// clear the kill flag from the MachineOperand. This function is used by /// optimization passes which extend register lifetimes and need only diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index e5d3a98e6c..8da2045ad0 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -19,7 +19,7 @@ // createCustomMachineSched); // // Inside <Target>PassConfig: -// enablePass(MachineSchedulerID); +// enablePass(&MachineSchedulerID); // MachineSchedRegistry::setDefault(createCustomMachineSched); // //===----------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index cc3b3d7235..4a24ab0d63 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -24,6 +24,7 @@ namespace llvm { class FunctionPass; class MachineFunctionPass; class PassInfo; + class PassManagerBase; class TargetLowering; class TargetRegisterClass; class raw_ostream; @@ -31,8 +32,6 @@ namespace llvm { namespace llvm { -extern char &NoPassID; // Allow targets to choose not to run a pass. - class PassConfigImpl; /// Target-Independent Code Generator Pass Configuration Options. @@ -54,9 +53,15 @@ public: /// optimization after regalloc. static char PostRAMachineLICMID; +private: + PassManagerBase *PM; + AnalysisID StartAfter; + AnalysisID StopAfter; + bool Started; + bool Stopped; + protected: TargetMachine *TM; - PassManagerBase *PM; PassConfigImpl *Impl; // Internal data structures bool Initialized; // Flagged after all passes are configured. @@ -91,6 +96,18 @@ public: CodeGenOpt::Level getOptLevel() const { return TM->getOptLevel(); } + /// setStartStopPasses - Set the StartAfter and StopAfter passes to allow + /// running only a portion of the normal code-gen pass sequence. If the + /// Start pass ID is zero, then compilation will begin at the normal point; + /// otherwise, clear the Started flag to indicate that passes should not be + /// added until the starting pass is seen. If the Stop pass ID is zero, + /// then compilation will continue to the end. + void setStartStopPasses(AnalysisID Start, AnalysisID Stop) { + StartAfter = Start; + StopAfter = Stop; + Started = (StartAfter == 0); + } + void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); } bool getEnableTailMerge() const { return EnableTailMerge; } @@ -98,19 +115,19 @@ public: /// Allow the target to override a specific pass without overriding the pass /// pipeline. When passes are added to the standard pipeline at the - /// point where StadardID is expected, add TargetID in its place. - void substitutePass(char &StandardID, char &TargetID); + /// point where StandardID is expected, add TargetID in its place. + void substitutePass(AnalysisID StandardID, AnalysisID TargetID); /// Insert InsertedPassID pass after TargetPassID pass. - void insertPass(const char &TargetPassID, const char &InsertedPassID); + void insertPass(AnalysisID TargetPassID, AnalysisID InsertedPassID); /// Allow the target to enable a specific standard pass by default. - void enablePass(char &ID) { substitutePass(ID, ID); } + void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); } /// Allow the target to disable a specific standard pass by default. - void disablePass(char &ID) { substitutePass(ID, NoPassID); } + void disablePass(AnalysisID PassID) { substitutePass(PassID, 0); } - /// Return the pass ssubtituted for StandardID by the target. + /// Return the pass substituted for StandardID by the target. /// If no substitution exists, return StandardID. AnalysisID getPassSubstitution(AnalysisID StandardID) const; @@ -121,6 +138,9 @@ public: /// transforms following machine independent optimization. virtual void addIRPasses(); + /// Add passes to lower exception handling for the code generator. + void addPassesToHandleExceptions(); + /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. virtual void addISelPrepare(); @@ -175,6 +195,18 @@ protected: /// LLVMTargetMachine provides standard regalloc passes for most targets. virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + /// addPreRewrite - Add passes to the optimized register allocation pipeline + /// after register allocation is complete, but before virtual registers are + /// rewritten to physical registers. + /// + /// These passes must preserve VirtRegMap and LiveIntervals, and when running + /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix. + /// When these passes run, VirtRegMap contains legal physreg assignments for + /// all virtual registers. + virtual bool addPreRewrite() { + return false; + } + /// addFinalizeRegAlloc - This method may be implemented by targets that want /// to run passes within the regalloc pipeline, immediately after the register /// allocation pass itself. These passes run as soon as virtual regisiters @@ -219,8 +251,12 @@ protected: /// /// Add a CodeGen pass at this point in the pipeline after checking overrides. - /// Return the pass that was added, or NoPassID. - AnalysisID addPass(char &ID); + /// Return the pass that was added, or zero if no pass was added. + AnalysisID addPass(AnalysisID PassID); + + /// Add a pass to the PassManager if that pass is supposed to be run, as + /// determined by the StartAfter and StopAfter options. + void addPass(Pass *P); /// addMachinePasses helper to create the target-selected or overriden /// regalloc pass. @@ -229,7 +265,7 @@ protected: /// printAndVerify - Add a pass to dump then verify the machine function, if /// those steps are enabled. /// - void printAndVerify(const char *Banner) const; + void printAndVerify(const char *Banner); }; } // namespace llvm diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h deleted file mode 100644 index 6ab57f03ae..0000000000 --- a/include/llvm/CodeGen/ProcessImplicitDefs.h +++ /dev/null @@ -1,51 +0,0 @@ -//===-------------- llvm/CodeGen/ProcessImplicitDefs.h ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -#ifndef LLVM_CODEGEN_PROCESSIMPLICITDEFS_H -#define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/SmallSet.h" - -namespace llvm { - - class MachineInstr; - class TargetInstrInfo; - class TargetRegisterInfo; - class MachineRegisterInfo; - class LiveVariables; - - /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def - /// for each use. Add isUndef marker to implicit_def defs and their uses. - class ProcessImplicitDefs : public MachineFunctionPass { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - LiveVariables *LV; - - bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, - unsigned OpIdx, - SmallSet<unsigned, 8> &ImpDefRegs); - - public: - static char ID; - - ProcessImplicitDefs() : MachineFunctionPass(ID) { - initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - }; - -} - -#endif // LLVM_CODEGEN_PROCESSIMPLICITDEFS_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 5a4213625b..9849e92f7d 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -33,6 +33,8 @@ namespace llvm { class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { + bool UseInitArray; + public: virtual ~TargetLoweringObjectFileELF() {} @@ -66,6 +68,7 @@ public: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const; + void InitializeELF(bool UseInitArray_); virtual const MCSection * getStaticCtorSection(unsigned Priority = 65535) const; virtual const MCSection * diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/DIBuilder.h index 35fd0d089a..596eb0605d 100644 --- a/include/llvm/Analysis/DIBuilder.h +++ b/include/llvm/DIBuilder.h @@ -1,4 +1,4 @@ -//===--- llvm/Analysis/DIBuilder.h - Debug Information Builder --*- C++ -*-===// +//===--- llvm/DIBuilder.h - Debug Information Builder -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/DebugInfo.h index eb90ca4ae2..fdbafd69f2 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/DebugInfo.h @@ -104,12 +104,6 @@ namespace llvm { return getUnsignedField(0) & ~LLVMDebugVersionMask; } - /// print - print descriptor. - void print(raw_ostream &OS) const; - - /// dump - print descriptor to dbgs() with a newline. - void dump() const; - bool isDerivedType() const; bool isCompositeType() const; bool isBasicType() const; @@ -130,10 +124,18 @@ namespace llvm { bool isTemplateTypeParameter() const; bool isTemplateValueParameter() const; bool isObjCProperty() const; + + /// print - print descriptor. + void print(raw_ostream &OS) const; + + /// dump - print descriptor to dbgs() with a newline. + void dump() const; }; /// DISubrange - This is used to represent ranges, for array bounds. class DISubrange : public DIDescriptor { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {} @@ -155,10 +157,11 @@ namespace llvm { /// DIScope - A base class for various scopes. class DIScope : public DIDescriptor { - virtual void anchor(); + protected: + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DIScope(const MDNode *N = 0) : DIDescriptor (N) {} - virtual ~DIScope() {} StringRef getFilename() const; StringRef getDirectory() const; @@ -166,7 +169,8 @@ namespace llvm { /// DICompileUnit - A wrapper for a compile unit. class DICompileUnit : public DIScope { - virtual void anchor(); + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {} @@ -196,17 +200,12 @@ namespace llvm { /// Verify - Verify that a compile unit is well formed. bool Verify() const; - - /// print - print compile unit. - void print(raw_ostream &OS) const; - - /// dump - print compile unit to dbgs() with a newline. - void dump() const; }; /// DIFile - This is a wrapper for a file. class DIFile : public DIScope { - virtual void anchor(); + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const {} // FIXME: Output something? public: explicit DIFile(const MDNode *N = 0) : DIScope(N) { if (DbgNode && !isFile()) @@ -224,6 +223,8 @@ namespace llvm { /// FIXME: it seems strange that this doesn't have either a reference to the /// type/precision or a file/line pair for location info. class DIEnumerator : public DIDescriptor { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DIEnumerator(const MDNode *N = 0) : DIDescriptor(N) {} @@ -235,19 +236,17 @@ namespace llvm { /// FIXME: Types should be factored much better so that CV qualifiers and /// others do not require a huge and empty descriptor full of zeros. class DIType : public DIScope { - virtual void anchor(); protected: + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; // This ctor is used when the Tag has already been validated by a derived // ctor. DIType(const MDNode *N, bool, bool) : DIScope(N) {} - public: - /// Verify - Verify that a type descriptor is well formed. bool Verify() const; explicit DIType(const MDNode *N); explicit DIType() {} - virtual ~DIType() {} DIScope getContext() const { return getFieldAs<DIScope>(1); } StringRef getName() const { return getStringField(2); } @@ -314,17 +313,10 @@ namespace llvm { /// this descriptor. void replaceAllUsesWith(DIDescriptor &D); void replaceAllUsesWith(MDNode *D); - - /// print - print type. - void print(raw_ostream &OS) const; - - /// dump - print type to dbgs() with a newline. - void dump() const; }; /// DIBasicType - A basic type, like 'int' or 'float'. class DIBasicType : public DIType { - virtual void anchor(); public: explicit DIBasicType(const MDNode *N = 0) : DIType(N) {} @@ -332,18 +324,13 @@ namespace llvm { /// Verify - Verify that a basic type descriptor is well formed. bool Verify() const; - - /// print - print basic type. - void print(raw_ostream &OS) const; - - /// dump - print basic type to dbgs() with a newline. - void dump() const; }; /// DIDerivedType - A simple derived type, like a const qualified type, /// a typedef, a pointer or reference, etc. class DIDerivedType : public DIType { - virtual void anchor(); + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; protected: explicit DIDerivedType(const MDNode *N, bool, bool) : DIType(N, true, true) {} @@ -401,19 +388,14 @@ namespace llvm { /// Verify - Verify that a derived type descriptor is well formed. bool Verify() const; - - /// print - print derived type. - void print(raw_ostream &OS) const; - - /// dump - print derived type to dbgs() with a newline. - void dump() const; }; /// DICompositeType - This descriptor holds a type that can refer to multiple /// other types, like a function or struct. /// FIXME: Why is this a DIDerivedType?? class DICompositeType : public DIDerivedType { - virtual void anchor(); + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DICompositeType(const MDNode *N = 0) : DIDerivedType(N, true, true) { @@ -430,12 +412,6 @@ namespace llvm { /// Verify - Verify that a composite type descriptor is well formed. bool Verify() const; - - /// print - print composite type. - void print(raw_ostream &OS) const; - - /// dump - print composite type to dbgs() with a newline. - void dump() const; }; /// DITemplateTypeParameter - This is a wrapper for template type parameter. @@ -477,7 +453,8 @@ namespace llvm { /// DISubprogram - This is a wrapper for a subprogram (e.g. a function). class DISubprogram : public DIScope { - virtual void anchor(); + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DISubprogram(const MDNode *N = 0) : DIScope(N) {} @@ -576,12 +553,6 @@ namespace llvm { /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; - /// print - print subprogram. - void print(raw_ostream &OS) const; - - /// dump - print subprogram to dbgs() with a newline. - void dump() const; - /// describes - Return true if this subprogram provides debugging /// information for the function F. bool describes(const Function *F); @@ -597,6 +568,8 @@ namespace llvm { /// DIGlobalVariable - This is a wrapper for a global variable. class DIGlobalVariable : public DIDescriptor { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DIGlobalVariable(const MDNode *N = 0) : DIDescriptor(N) {} @@ -634,17 +607,13 @@ namespace llvm { /// Verify - Verify that a global variable descriptor is well formed. bool Verify() const; - - /// print - print global variable. - void print(raw_ostream &OS) const; - - /// dump - print global variable to dbgs() with a newline. - void dump() const; }; /// DIVariable - This is a wrapper for a variable (e.g. parameter, local, /// global etc). class DIVariable : public DIDescriptor { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DIVariable(const MDNode *N = 0) : DIDescriptor(N) {} @@ -706,18 +675,11 @@ namespace llvm { /// information for an inlined function arguments. bool isInlinedFnArgument(const Function *CurFn); - /// print - print variable. - void print(raw_ostream &OS) const; - void printExtendedName(raw_ostream &OS) const; - - /// dump - print variable to dbgs() with a newline. - void dump() const; }; /// DILexicalBlock - This is a wrapper for a lexical block. class DILexicalBlock : public DIScope { - virtual void anchor(); public: explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getFieldAs<DIScope>(1); } @@ -736,7 +698,6 @@ namespace llvm { /// DILexicalBlockFile - This is a wrapper for a lexical block with /// a filename change. class DILexicalBlockFile : public DIScope { - virtual void anchor(); public: explicit DILexicalBlockFile(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getScope().getContext(); } @@ -756,7 +717,6 @@ namespace llvm { /// DINameSpace - A wrapper for a C++ style name space. class DINameSpace : public DIScope { - virtual void anchor(); public: explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {} DIScope getContext() const { return getFieldAs<DIScope>(1); } @@ -830,12 +790,6 @@ namespace llvm { /// Verify - Verify that a derived type descriptor is well formed. bool Verify() const; - - /// print - print derived type. - void print(raw_ostream &OS) const; - - /// dump - print derived type to dbgs() with a newline. - void dump() const; }; /// getDISubprogram - Find subprogram that is enclosing this scope. diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 64f80c5065..6377acb634 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -26,26 +26,49 @@ class raw_ostream; /// DILineInfo - a format-neutral container for source line information. class DILineInfo { const char *FileName; + const char *FunctionName; uint32_t Line; uint32_t Column; public: - DILineInfo() : FileName("<invalid>"), Line(0), Column(0) {} - DILineInfo(const char *fileName, uint32_t line, uint32_t column) - : FileName(fileName), Line(line), Column(column) {} + DILineInfo() + : FileName("<invalid>"), FunctionName("<invalid>"), + Line(0), Column(0) {} + DILineInfo(const char *fileName, const char *functionName, + uint32_t line, uint32_t column) + : FileName(fileName), FunctionName(functionName), + Line(line), Column(column) {} const char *getFileName() const { return FileName; } + const char *getFunctionName() const { return FunctionName; } uint32_t getLine() const { return Line; } uint32_t getColumn() const { return Column; } bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && - std::strcmp(FileName, RHS.FileName) == 0; + std::strcmp(FileName, RHS.FileName) == 0 && + std::strcmp(FunctionName, RHS.FunctionName) == 0; } bool operator!=(const DILineInfo &RHS) const { return !(*this == RHS); } }; +/// DILineInfoSpecifier - controls which fields of DILineInfo container +/// should be filled with data. +class DILineInfoSpecifier { + const uint32_t Flags; // Or'ed flags that set the info we want to fetch. +public: + enum Specification { + FileLineInfo = 1 << 0, + FunctionName = 1 << 1 + }; + // Use file/line info by default. + DILineInfoSpecifier(uint32_t flags = FileLineInfo) : Flags(flags) {} + bool needs(Specification spec) const { + return (Flags & spec) > 0; + } +}; + class DIContext { public: virtual ~DIContext(); @@ -60,7 +83,8 @@ public: virtual void dump(raw_ostream &OS) = 0; - virtual DILineInfo getLineInfoForAddress(uint64_t address) = 0; + virtual DILineInfo getLineInfoForAddress(uint64_t address, + DILineInfoSpecifier specifier = DILineInfoSpecifier()) = 0; }; } diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h index fbc2798684..9d5773ec85 100644 --- a/include/llvm/GlobalValue.h +++ b/include/llvm/GlobalValue.h @@ -184,6 +184,12 @@ public: return Linkage == CommonLinkage; } + /// isDiscardableIfUnused - Whether the definition of this global may be + /// discarded if it is not used in its compilation unit. + static bool isDiscardableIfUnused(LinkageTypes Linkage) { + return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage); + } + /// mayBeOverridden - Whether the definition of this global may be replaced /// by something non-equivalent at link time. For example, if a function has /// weak linkage then the code defining it may be replaced by different code. @@ -241,6 +247,10 @@ public: void setLinkage(LinkageTypes LT) { Linkage = LT; } LinkageTypes getLinkage() const { return Linkage; } + bool isDiscardableIfUnused() const { + return isDiscardableIfUnused(Linkage); + } + bool mayBeOverridden() const { return mayBeOverridden(Linkage); } bool isWeakForLinker() const { return isWeakForLinker(Linkage); } diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h index 034ade1fb0..99b7a73b35 100644 --- a/include/llvm/GlobalVariable.h +++ b/include/llvm/GlobalVariable.h @@ -41,24 +41,35 @@ class GlobalVariable : public GlobalValue, public ilist_node<GlobalVariable> { void setParent(Module *parent); bool isConstantGlobal : 1; // Is this a global constant? - bool isThreadLocalSymbol : 1; // Is this symbol "Thread Local"? + unsigned threadLocalMode : 3; // Is this symbol "Thread Local", + // if so, what is the desired model? public: // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } + + enum ThreadLocalMode { + NotThreadLocal = 0, + GeneralDynamicTLSModel, + LocalDynamicTLSModel, + InitialExecTLSModel, + LocalExecTLSModel + }; + /// GlobalVariable ctor - If a parent module is specified, the global is /// automatically inserted into the end of the specified modules global list. GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage, Constant *Initializer = 0, const Twine &Name = "", - bool ThreadLocal = false, unsigned AddressSpace = 0); + ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0); /// GlobalVariable ctor - This creates a global and inserts it before the /// specified other global. GlobalVariable(Module &M, Type *Ty, bool isConstant, LinkageTypes Linkage, Constant *Initializer, - const Twine &Name, - GlobalVariable *InsertBefore = 0, bool ThreadLocal = false, + const Twine &Name = "", + GlobalVariable *InsertBefore = 0, + ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0); ~GlobalVariable() { @@ -135,8 +146,14 @@ public: void setConstant(bool Val) { isConstantGlobal = Val; } /// If the value is "Thread Local", its value isn't shared by the threads. - bool isThreadLocal() const { return isThreadLocalSymbol; } - void setThreadLocal(bool Val) { isThreadLocalSymbol = Val; } + bool isThreadLocal() const { return threadLocalMode != NotThreadLocal; } + void setThreadLocal(bool Val) { + threadLocalMode = Val ? GeneralDynamicTLSModel : NotThreadLocal; + } + void setThreadLocalMode(ThreadLocalMode Val) { threadLocalMode = Val; } + ThreadLocalMode getThreadLocalMode() const { + return static_cast<ThreadLocalMode>(threadLocalMode); + } /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalVariable) from the GlobalVariable Src to this one. diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/IRBuilder.h index ef00e8ec24..c6200273b1 100644 --- a/include/llvm/Support/IRBuilder.h +++ b/include/llvm/IRBuilder.h @@ -1,4 +1,4 @@ -//===---- llvm/Support/IRBuilder.h - Builder for LLVM Instrs ----*- C++ -*-===// +//===---- llvm/IRBuilder.h - Builder for LLVM Instructions ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_IRBUILDER_H -#define LLVM_SUPPORT_IRBUILDER_H +#ifndef LLVM_IRBUILDER_H +#define LLVM_IRBUILDER_H #include "llvm/Instructions.h" #include "llvm/BasicBlock.h" diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 1b8bd79eca..c2cb7c218b 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -216,7 +216,6 @@ void initializeRegionOnlyPrinterPass(PassRegistry&); void initializeRegionOnlyViewerPass(PassRegistry&); void initializeRegionPrinterPass(PassRegistry&); void initializeRegionViewerPass(PassRegistry&); -void initializeRenderMachineFunctionPass(PassRegistry&); void initializeSCCPPass(PassRegistry&); void initializeSROA_DTPass(PassRegistry&); void initializeSROA_SSAUpPass(PassRegistry&); diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h index a386d1de42..5512dcc9e6 100644 --- a/include/llvm/Instruction.h +++ b/include/llvm/Instruction.h @@ -281,6 +281,16 @@ public: /// ignores the SubclassOptionalData flags, which specify conditions /// under which the instruction's result is undefined. bool isIdenticalToWhenDefined(const Instruction *I) const; + + /// When checking for operation equivalence (using isSameOperationAs) it is + /// sometimes useful to ignore certain attributes. + enum OperationEquivalenceFlags { + /// Check for equivalence ignoring load/store alignment. + CompareIgnoringAlignment = 1<<0, + /// Check for equivalence treating a type and a vector of that type + /// as equivalent. + CompareUsingScalarTypes = 1<<1 + }; /// This function determines if the specified instruction executes the same /// operation as the current one. This means that the opcodes, type, operand @@ -290,7 +300,7 @@ public: /// @returns true if the specified instruction is the same operation as /// the current one. /// @brief Determine if one instruction is the same operation as another. - bool isSameOperationAs(const Instruction *I) const; + bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const; /// isUsedOutsideOfBlock - Return true if there are any uses of this /// instruction in blocks other than the specified block. Note that PHI nodes diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index f5a48cd47e..f5187e6832 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -701,7 +701,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicRMWInst, Value) // checkGEPType - Simple wrapper function to give a better assertion failure // message on bad indexes for a gep instruction. // -static inline Type *checkGEPType(Type *Ty) { +inline Type *checkGEPType(Type *Ty) { assert(Ty && "Invalid GetElementPtrInst indices for type!"); return Ty; } @@ -1267,6 +1267,11 @@ public: /// removeAttribute - removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attributes attr); + /// \brief Return true if this call has the given attribute. + bool hasFnAttr(Attributes N) const { + return paramHasAttr(~0, N); + } + /// @brief Determine whether the call or the callee has the given attribute. bool paramHasAttr(unsigned i, Attributes attr) const; @@ -1276,7 +1281,7 @@ public: } /// @brief Return true if the call should not be inlined. - bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); } + bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline(bool Value = true) { if (Value) addAttribute(~0, Attribute::NoInline); else removeAttribute(~0, Attribute::NoInline); @@ -1284,7 +1289,7 @@ public: /// @brief Return true if the call can return twice bool canReturnTwice() const { - return paramHasAttr(~0, Attribute::ReturnsTwice); + return hasFnAttr(Attribute::ReturnsTwice); } void setCanReturnTwice(bool Value = true) { if (Value) addAttribute(~0, Attribute::ReturnsTwice); @@ -1293,7 +1298,7 @@ public: /// @brief Determine if the call does not access memory. bool doesNotAccessMemory() const { - return paramHasAttr(~0, Attribute::ReadNone); + return hasFnAttr(Attribute::ReadNone); } void setDoesNotAccessMemory(bool NotAccessMemory = true) { if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone); @@ -1302,7 +1307,7 @@ public: /// @brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || paramHasAttr(~0, Attribute::ReadOnly); + return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } void setOnlyReadsMemory(bool OnlyReadsMemory = true) { if (OnlyReadsMemory) addAttribute(~0, Attribute::ReadOnly); @@ -1310,14 +1315,14 @@ public: } /// @brief Determine if the call cannot return. - bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); } + bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn(bool DoesNotReturn = true) { if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn); else removeAttribute(~0, Attribute::NoReturn); } /// @brief Determine if the call cannot unwind. - bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); } + bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); } void setDoesNotThrow(bool DoesNotThrow = true) { if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind); else removeAttribute(~0, Attribute::NoUnwind); @@ -2442,10 +2447,31 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) class SwitchInst : public TerminatorInst { void *operator new(size_t, unsigned); // DO NOT IMPLEMENT unsigned ReservedSpace; + // Operands format: // Operand[0] = Value to switch on // Operand[1] = Default basic block destination // Operand[2n ] = Value to match // Operand[2n+1] = BasicBlock to go to on match + + // Store case values separately from operands list. We needn't User-Use + // concept here, since it is just a case value, it will always constant, + // and case value couldn't reused with another instructions/values. + // Additionally: + // It allows us to use custom type for case values that is not inherited + // from Value. Since case value is a complex type that implements + // the subset of integers, we needn't extract sub-constants within + // slow getAggregateElement method. + // For case values we will use std::list to by two reasons: + // 1. It allows to add/remove cases without whole collection reallocation. + // 2. In most of cases we needn't random access. + // Currently case values are also stored in Operands List, but it will moved + // out in future commits. + typedef std::list<IntegersSubset> Subsets; + typedef Subsets::iterator SubsetsIt; + typedef Subsets::const_iterator SubsetsConstIt; + + Subsets TheSubsets; + SwitchInst(const SwitchInst &SI); void init(Value *Value, BasicBlock *Default, unsigned NumReserved); void growOperands(); @@ -2470,12 +2496,20 @@ protected: virtual SwitchInst *clone_impl() const; public: - template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy> + // FIXME: Currently there are a lot of unclean template parameters, + // we need to make refactoring in future. + // All these parameters are used to implement both iterator and const_iterator + // without code duplication. + // SwitchInstTy may be "const SwitchInst" or "SwitchInst" + // ConstantIntTy may be "const ConstantInt" or "ConstantInt" + // SubsetsItTy may be SubsetsConstIt or SubsetsIt + // BasicBlockTy may be "const BasicBlock" or "BasicBlock" + template <class SwitchInstTy, class ConstantIntTy, + class SubsetsItTy, class BasicBlockTy> class CaseIteratorT; - typedef CaseIteratorT<const SwitchInst, const ConstantInt, const BasicBlock> - ConstCaseIt; - + typedef CaseIteratorT<const SwitchInst, const ConstantInt, + SubsetsConstIt, const BasicBlock> ConstCaseIt; class CaseIt; // -2 @@ -2516,23 +2550,23 @@ public: /// Returns a read/write iterator that points to the first /// case in SwitchInst. CaseIt case_begin() { - return CaseIt(this, 0); + return CaseIt(this, 0, TheSubsets.begin()); } /// Returns a read-only iterator that points to the first /// case in the SwitchInst. ConstCaseIt case_begin() const { - return ConstCaseIt(this, 0); + return ConstCaseIt(this, 0, TheSubsets.begin()); } /// Returns a read/write iterator that points one past the last /// in the SwitchInst. CaseIt case_end() { - return CaseIt(this, getNumCases()); + return CaseIt(this, getNumCases(), TheSubsets.end()); } /// Returns a read-only iterator that points one past the last /// in the SwitchInst. ConstCaseIt case_end() const { - return ConstCaseIt(this, getNumCases()); + return ConstCaseIt(this, getNumCases(), TheSubsets.end()); } /// Returns an iterator that points to the default case. /// Note: this iterator allows to resolve successor only. Attempt @@ -2540,10 +2574,10 @@ public: /// Also note, that increment and decrement also causes an assertion and /// makes iterator invalid. CaseIt case_default() { - return CaseIt(this, DefaultPseudoIndex); + return CaseIt(this, DefaultPseudoIndex, TheSubsets.end()); } ConstCaseIt case_default() const { - return ConstCaseIt(this, DefaultPseudoIndex); + return ConstCaseIt(this, DefaultPseudoIndex, TheSubsets.end()); } /// findCaseValue - Search all of the case values for the specified constant. @@ -2597,7 +2631,7 @@ public: /// Note: /// This action invalidates iterators for all cases following the one removed, /// including the case_end() iterator. - void removeCase(CaseIt i); + void removeCase(CaseIt& i); unsigned getNumSuccessors() const { return getNumOperands()/2; } BasicBlock *getSuccessor(unsigned idx) const { @@ -2622,24 +2656,38 @@ public: // Case iterators definition. - template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy> + template <class SwitchInstTy, class ConstantIntTy, + class SubsetsItTy, class BasicBlockTy> class CaseIteratorT { protected: SwitchInstTy *SI; - unsigned Index; - - public: - - typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, BasicBlockTy> Self; + unsigned long Index; + SubsetsItTy SubsetIt; /// Initializes case iterator for given SwitchInst and for given /// case number. - CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) { + friend class SwitchInst; + CaseIteratorT(SwitchInstTy *SI, unsigned SuccessorIndex, + SubsetsItTy CaseValueIt) { this->SI = SI; - Index = CaseNum; + Index = SuccessorIndex; + this->SubsetIt = CaseValueIt; } + public: + typedef typename SubsetsItTy::reference IntegersSubsetRef; + typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, + SubsetsItTy, BasicBlockTy> Self; + + CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) { + this->SI = SI; + Index = CaseNum; + SubsetIt = SI->TheSubsets.begin(); + std::advance(SubsetIt, CaseNum); + } + + /// Initializes case iterator for given SwitchInst and for given /// TerminatorInst's successor index. static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) { @@ -2654,19 +2702,17 @@ public: /// @Deprecated ConstantIntTy *getCaseValue() { assert(Index < SI->getNumCases() && "Index out the number of cases."); - IntegersSubset CaseRanges = - reinterpret_cast<Constant*>(SI->getOperand(2 + Index*2)); - IntegersSubset::Range R = CaseRanges.getItem(0); + IntegersSubsetRef CaseRanges = *SubsetIt; // FIXME: Currently we work with ConstantInt based cases. // So return CaseValue as ConstantInt. - return R.getLow().toConstantInt(); + return CaseRanges.getSingleNumber(0).toConstantInt(); } /// Resolves case value for current case. - IntegersSubset getCaseValueEx() { + IntegersSubsetRef getCaseValueEx() { assert(Index < SI->getNumCases() && "Index out the number of cases."); - return reinterpret_cast<Constant*>(SI->getOperand(2 + Index*2)); + return *SubsetIt; } /// Resolves successor for current case. @@ -2689,9 +2735,13 @@ public: Self operator++() { // Check index correctness after increment. - // Note: Index == getNumCases() means end(). + // Note: Index == getNumCases() means end(). assert(Index+1 <= SI->getNumCases() && "Index out the number of cases."); ++Index; + if (Index == 0) + SubsetIt = SI->TheSubsets.begin(); + else + ++SubsetIt; return *this; } Self operator++(int) { @@ -2703,9 +2753,18 @@ public: // Check index correctness after decrement. // Note: Index == getNumCases() means end(). // Also allow "-1" iterator here. That will became valid after ++. - assert((Index == 0 || Index-1 <= SI->getNumCases()) && + unsigned NumCases = SI->getNumCases(); + assert((Index == 0 || Index-1 <= NumCases) && "Index out the number of cases."); --Index; + if (Index == NumCases) { + SubsetIt = SI->TheSubsets.end(); + return *this; + } + + if (Index != -1UL) + --SubsetIt; + return *this; } Self operator--(int) { @@ -2723,14 +2782,25 @@ public: } }; - class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> { + class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt, + SubsetsIt, BasicBlock> { + typedef CaseIteratorT<SwitchInst, ConstantInt, SubsetsIt, BasicBlock> + ParentTy; + + protected: + friend class SwitchInst; + CaseIt(SwitchInst *SI, unsigned CaseNum, SubsetsIt SubsetIt) : + ParentTy(SI, CaseNum, SubsetIt) {} - typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy; + void updateCaseValueOperand(IntegersSubset& V) { + SI->setOperand(2 + Index*2, reinterpret_cast<Value*>((Constant*)V)); + } public: + + CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} CaseIt(const ParentTy& Src) : ParentTy(Src) {} - CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {} /// Sets the new value for current case. /// @Deprecated. @@ -2740,14 +2810,15 @@ public: // FIXME: Currently we work with ConstantInt based cases. // So inititalize IntItem container directly from ConstantInt. Mapping.add(IntItem::fromConstantInt(V)); - SI->setOperand(2 + Index*2, - reinterpret_cast<Value*>((Constant*)Mapping.getCase())); + *SubsetIt = Mapping.getCase(); + updateCaseValueOperand(*SubsetIt); } /// Sets the new value for current case. void setValueEx(IntegersSubset& V) { assert(Index < SI->getNumCases() && "Index out the number of cases."); - SI->setOperand(2 + Index*2, reinterpret_cast<Value*>((Constant*)V)); + *SubsetIt = V; + updateCaseValueOperand(*SubsetIt); } /// Sets the new successor for current case. @@ -2958,6 +3029,11 @@ public: /// removeAttribute - removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attributes attr); + /// \brief Return true if this call has the given attribute. + bool hasFnAttr(Attributes N) const { + return paramHasAttr(~0, N); + } + /// @brief Determine whether the call or the callee has the given attribute. bool paramHasAttr(unsigned i, Attributes attr) const; @@ -2967,7 +3043,7 @@ public: } /// @brief Return true if the call should not be inlined. - bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); } + bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline(bool Value = true) { if (Value) addAttribute(~0, Attribute::NoInline); else removeAttribute(~0, Attribute::NoInline); @@ -2975,7 +3051,7 @@ public: /// @brief Determine if the call does not access memory. bool doesNotAccessMemory() const { - return paramHasAttr(~0, Attribute::ReadNone); + return hasFnAttr(Attribute::ReadNone); } void setDoesNotAccessMemory(bool NotAccessMemory = true) { if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone); @@ -2984,7 +3060,7 @@ public: /// @brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { - return doesNotAccessMemory() || paramHasAttr(~0, Attribute::ReadOnly); + return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } void setOnlyReadsMemory(bool OnlyReadsMemory = true) { if (OnlyReadsMemory) addAttribute(~0, Attribute::ReadOnly); @@ -2992,14 +3068,14 @@ public: } /// @brief Determine if the call cannot return. - bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); } + bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn(bool DoesNotReturn = true) { if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn); else removeAttribute(~0, Attribute::NoReturn); } /// @brief Determine if the call cannot unwind. - bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); } + bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); } void setDoesNotThrow(bool DoesNotThrow = true) { if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind); else removeAttribute(~0, Attribute::NoUnwind); diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index c2e2065152..d3960ecb34 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -412,6 +412,9 @@ def int_trap : Intrinsic<[], [], [IntrNoReturn]>, def int_debugtrap : Intrinsic<[]>, GCCBuiltin<"__builtin_debugtrap">; +// NOP: calls/invokes to this intrinsic are removed by codegen +def int_donothing : Intrinsic<[], [], [IntrNoMem]>; + // Intrisics to support half precision floating point format let Properties = [IntrNoMem] in { def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>, @@ -485,3 +488,4 @@ include "llvm/IntrinsicsCellSPU.td" include "llvm/IntrinsicsXCore.td" include "llvm/IntrinsicsHexagon.td" include "llvm/IntrinsicsNVVM.td" +include "llvm/IntrinsicsMips.td" diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IntrinsicsHexagon.td index f4a905b890..efd04f309a 100644 --- a/include/llvm/IntrinsicsHexagon.td +++ b/include/llvm/IntrinsicsHexagon.td @@ -612,7 +612,7 @@ class Hexagon_df_dfdfdfqi_Intrinsic<string GCCIntSuffix> // BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1) // def int_hexagon_SI_to_SXTHI_asrh : -Hexagon_si_si_Intrinsic<"SI.to.SXTHI.asrh">; +Hexagon_si_si_Intrinsic<"SI_to_SXTHI_asrh">; // // BUILTIN_INFO_NONCONST(circ_ldd,PTR_ftype_PTRPTRSISI,4) // @@ -624,4254 +624,4254 @@ Hexagon_mem_memmemsisi_Intrinsic<"circ_ldd">; // BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2) // def int_hexagon_C2_cmpeq : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeq">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpeq">; // // BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgt : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgt">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgt">; // // BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgtu : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtu">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgtu">; // // BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2) // def int_hexagon_C2_cmpeqp : -Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpeqp">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_C2_cmpeqp">; // // BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2) // def int_hexagon_C2_cmpgtp : -Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtp">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_C2_cmpgtp">; // // BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2) // def int_hexagon_C2_cmpgtup : -Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtup">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_C2_cmpgtup">; // // BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2) // def int_hexagon_A4_rcmpeqi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeqi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeqi">; // // BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2) // def int_hexagon_A4_rcmpneqi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneqi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneqi">; // // BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2) // def int_hexagon_A4_rcmpeq : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeq">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpeq">; // // BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2) // def int_hexagon_A4_rcmpneq : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneq">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_rcmpneq">; // // BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2) // def int_hexagon_C2_bitsset : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsset">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_bitsset">; // // BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2) // def int_hexagon_C2_bitsclr : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclr">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_bitsclr">; // // BUILTIN_INFO(HEXAGON.C4_nbitsset,QI_ftype_SISI,2) // def int_hexagon_C4_nbitsset : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.nbitsset">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_nbitsset">; // // BUILTIN_INFO(HEXAGON.C4_nbitsclr,QI_ftype_SISI,2) // def int_hexagon_C4_nbitsclr : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.nbitsclr">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_nbitsclr">; // // BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2) // def int_hexagon_C2_cmpeqi : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeqi">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpeqi">; // // BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgti : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgti">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgti">; // // BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgtui : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtui">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgtui">; // // BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgei : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgei">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgei">; // // BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2) // def int_hexagon_C2_cmpgeui : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgeui">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpgeui">; // // BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2) // def int_hexagon_C2_cmplt : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmplt">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmplt">; // // BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2) // def int_hexagon_C2_cmpltu : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpltu">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_cmpltu">; // // BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2) // def int_hexagon_C2_bitsclri : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclri">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C2_bitsclri">; // // BUILTIN_INFO(HEXAGON.C4_nbitsclri,QI_ftype_SISI,2) // def int_hexagon_C4_nbitsclri : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.nbitsclri">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_nbitsclri">; // // BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2) // def int_hexagon_C4_cmpneqi : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneqi">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmpneqi">; // // BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2) // def int_hexagon_C4_cmpltei : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpltei">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmpltei">; // // BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2) // def int_hexagon_C4_cmplteui : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteui">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmplteui">; // // BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2) // def int_hexagon_C4_cmpneq : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneq">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmpneq">; // // BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2) // def int_hexagon_C4_cmplte : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplte">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmplte">; // // BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2) // def int_hexagon_C4_cmplteu : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteu">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_C4_cmplteu">; // // BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2) // def int_hexagon_C2_and : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.and">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C2_and">; // // BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2) // def int_hexagon_C2_or : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.or">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C2_or">; // // BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2) // def int_hexagon_C2_xor : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.xor">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C2_xor">; // // BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2) // def int_hexagon_C2_andn : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.andn">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C2_andn">; // // BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1) // def int_hexagon_C2_not : -Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.not">; +Hexagon_qi_qi_Intrinsic<"HEXAGON_C2_not">; // // BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2) // def int_hexagon_C2_orn : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.orn">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C2_orn">; // // BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3) // def int_hexagon_C4_and_and : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and.and">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_and_and">; // // BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3) // def int_hexagon_C4_and_or : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and.or">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_and_or">; // // BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3) // def int_hexagon_C4_or_and : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or.and">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_or_and">; // // BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3) // def int_hexagon_C4_or_or : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or.or">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_or_or">; // // BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3) // def int_hexagon_C4_and_andn : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and.andn">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_and_andn">; // // BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3) // def int_hexagon_C4_and_orn : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and.orn">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_and_orn">; // // BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3) // def int_hexagon_C4_or_andn : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or.andn">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_or_andn">; // // BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3) // def int_hexagon_C4_or_orn : -Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or.orn">; +Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON_C4_or_orn">; // // BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1) // def int_hexagon_C2_pxfer_map : -Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.pxfer.map">; +Hexagon_qi_qi_Intrinsic<"HEXAGON_C2_pxfer_map">; // // BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1) // def int_hexagon_C2_any8 : -Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.any8">; +Hexagon_qi_qi_Intrinsic<"HEXAGON_C2_any8">; // // BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1) // def int_hexagon_C2_all8 : -Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.all8">; +Hexagon_qi_qi_Intrinsic<"HEXAGON_C2_all8">; // // BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2) // def int_hexagon_C2_vitpack : -Hexagon_si_qiqi_Intrinsic<"HEXAGON.C2.vitpack">; +Hexagon_si_qiqi_Intrinsic<"HEXAGON_C2_vitpack">; // // BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3) // def int_hexagon_C2_mux : -Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.mux">; +Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_mux">; // // BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3) // def int_hexagon_C2_muxii : -Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxii">; +Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxii">; // // BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3) // def int_hexagon_C2_muxir : -Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxir">; +Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxir">; // // BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3) // def int_hexagon_C2_muxri : -Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxri">; +Hexagon_si_qisisi_Intrinsic<"HEXAGON_C2_muxri">; // // BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3) // def int_hexagon_C2_vmux : -Hexagon_di_qididi_Intrinsic<"HEXAGON.C2.vmux">; +Hexagon_di_qididi_Intrinsic<"HEXAGON_C2_vmux">; // // BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1) // def int_hexagon_C2_mask : -Hexagon_di_qi_Intrinsic<"HEXAGON.C2.mask">; +Hexagon_di_qi_Intrinsic<"HEXAGON_C2_mask">; // // BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpbeq : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbeq">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpbeq">; // // BUILTIN_INFO(HEXAGON.A4_vcmpbeqi,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpbeqi : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpbeqi">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpbeqi">; // // BUILTIN_INFO(HEXAGON.A4_vcmpbeq_any,QI_ftype_DIDI,2) // def int_hexagon_A4_vcmpbeq_any : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A4.vcmpbeq.any">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A4_vcmpbeq_any">; // // BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpbgtu : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbgtu">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpbgtu">; // // BUILTIN_INFO(HEXAGON.A4_vcmpbgtui,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpbgtui : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpbgtui">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpbgtui">; // // BUILTIN_INFO(HEXAGON.A4_vcmpbgt,QI_ftype_DIDI,2) // def int_hexagon_A4_vcmpbgt : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A4.vcmpbgt">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A4_vcmpbgt">; // // BUILTIN_INFO(HEXAGON.A4_vcmpbgti,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpbgti : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpbgti">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpbgti">; // // BUILTIN_INFO(HEXAGON.A4_cmpbeq,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbeq : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbeq">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbeq">; // // BUILTIN_INFO(HEXAGON.A4_cmpbeqi,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbeqi : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbeqi">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbeqi">; // // BUILTIN_INFO(HEXAGON.A4_cmpbgtu,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbgtu : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbgtu">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbgtu">; // // BUILTIN_INFO(HEXAGON.A4_cmpbgtui,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbgtui : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbgtui">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbgtui">; // // BUILTIN_INFO(HEXAGON.A4_cmpbgt,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbgt : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbgt">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbgt">; // // BUILTIN_INFO(HEXAGON.A4_cmpbgti,QI_ftype_SISI,2) // def int_hexagon_A4_cmpbgti : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpbgti">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpbgti">; // // BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpheq : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpheq">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpheq">; // // BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmphgt : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgt">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmphgt">; // // BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmphgtu : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgtu">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmphgtu">; // // BUILTIN_INFO(HEXAGON.A4_vcmpheqi,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpheqi : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpheqi">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpheqi">; // // BUILTIN_INFO(HEXAGON.A4_vcmphgti,QI_ftype_DISI,2) // def int_hexagon_A4_vcmphgti : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmphgti">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmphgti">; // // BUILTIN_INFO(HEXAGON.A4_vcmphgtui,QI_ftype_DISI,2) // def int_hexagon_A4_vcmphgtui : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmphgtui">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmphgtui">; // // BUILTIN_INFO(HEXAGON.A4_cmpheq,QI_ftype_SISI,2) // def int_hexagon_A4_cmpheq : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpheq">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpheq">; // // BUILTIN_INFO(HEXAGON.A4_cmphgt,QI_ftype_SISI,2) // def int_hexagon_A4_cmphgt : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmphgt">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmphgt">; // // BUILTIN_INFO(HEXAGON.A4_cmphgtu,QI_ftype_SISI,2) // def int_hexagon_A4_cmphgtu : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmphgtu">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmphgtu">; // // BUILTIN_INFO(HEXAGON.A4_cmpheqi,QI_ftype_SISI,2) // def int_hexagon_A4_cmpheqi : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmpheqi">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmpheqi">; // // BUILTIN_INFO(HEXAGON.A4_cmphgti,QI_ftype_SISI,2) // def int_hexagon_A4_cmphgti : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmphgti">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmphgti">; // // BUILTIN_INFO(HEXAGON.A4_cmphgtui,QI_ftype_SISI,2) // def int_hexagon_A4_cmphgtui : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.A4.cmphgtui">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_A4_cmphgtui">; // // BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpweq : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpweq">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpweq">; // // BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpwgt : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgt">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpwgt">; // // BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2) // def int_hexagon_A2_vcmpwgtu : -Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgtu">; +Hexagon_qi_didi_Intrinsic<"HEXAGON_A2_vcmpwgtu">; // // BUILTIN_INFO(HEXAGON.A4_vcmpweqi,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpweqi : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpweqi">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpweqi">; // // BUILTIN_INFO(HEXAGON.A4_vcmpwgti,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpwgti : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpwgti">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpwgti">; // // BUILTIN_INFO(HEXAGON.A4_vcmpwgtui,QI_ftype_DISI,2) // def int_hexagon_A4_vcmpwgtui : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.vcmpwgtui">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_vcmpwgtui">; // // BUILTIN_INFO(HEXAGON.A4_boundscheck,QI_ftype_SIDI,2) // def int_hexagon_A4_boundscheck : -Hexagon_qi_sidi_Intrinsic<"HEXAGON.A4.boundscheck">; +Hexagon_qi_sidi_Intrinsic<"HEXAGON_A4_boundscheck">; // // BUILTIN_INFO(HEXAGON.A4_tlbmatch,QI_ftype_DISI,2) // def int_hexagon_A4_tlbmatch : -Hexagon_qi_disi_Intrinsic<"HEXAGON.A4.tlbmatch">; +Hexagon_qi_disi_Intrinsic<"HEXAGON_A4_tlbmatch">; // // BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1) // def int_hexagon_C2_tfrpr : -Hexagon_si_qi_Intrinsic<"HEXAGON.C2.tfrpr">; +Hexagon_si_qi_Intrinsic<"HEXAGON_C2_tfrpr">; // // BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1) // def int_hexagon_C2_tfrrp : -Hexagon_qi_si_Intrinsic<"HEXAGON.C2.tfrrp">; +Hexagon_qi_si_Intrinsic<"HEXAGON_C2_tfrrp">; // // BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2) // def int_hexagon_C4_fastcorner9 : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9">; // // BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2) // def int_hexagon_C4_fastcorner9_not : -Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9.not">; +Hexagon_qi_qiqi_Intrinsic<"HEXAGON_C4_fastcorner9_not">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_acc_sat_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_acc_sat_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpy_nac_sat_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpy_nac_sat_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_hh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_hh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_hl_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_hl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_lh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_lh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_ll_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_ll_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_hh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_hh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_hl_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_hl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_lh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_lh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_ll_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_ll_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_hh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_hh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_hl_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_hl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_lh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_lh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_ll_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_rnd_ll_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_rnd_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_hh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_hh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_hl_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_hl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_lh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_lh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_ll_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_sat_rnd_ll_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_sat_rnd_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_hh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_hh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_hl_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_hl_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_lh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_lh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_ll_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_acc_ll_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_acc_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_hh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_hh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_hl_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_hl_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_lh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_lh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_ll_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyd_nac_ll_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyd_nac_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_hh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_hh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_hl_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_hl_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_lh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_lh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_ll_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_ll_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_hh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_hh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_hl_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_hl_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_lh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_lh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_ll_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2) // def int_hexagon_M2_mpyd_rnd_ll_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_acc_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_acc_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_hh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_hh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_hl_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_hl_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_lh_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_lh_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_ll_s0 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s0">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3) // def int_hexagon_M2_mpyu_nac_ll_s1 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s1">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_mpyu_nac_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_hh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_hh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_hl_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_hl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_lh_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_lh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_ll_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_ll_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_hh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_hh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_hl_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_hl_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_lh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_lh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_ll_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_acc_ll_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_acc_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_hh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_hh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_hl_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_hl_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_lh_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_lh_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_ll_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_mpyud_nac_ll_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_hh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_hh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_hl_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_hl_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_hl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_lh_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_lh_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_lh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_ll_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2) // def int_hexagon_M2_mpyud_ll_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_mpyud_ll_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2) // def int_hexagon_M2_mpysmi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpysmi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysmi">; // // BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3) // def int_hexagon_M2_macsip : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsip">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsip">; // // BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3) // def int_hexagon_M2_macsin : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsin">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_macsin">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2) // def int_hexagon_M2_dpmpyss_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_s0">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_dpmpyss_acc_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.acc.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_acc_s0">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_dpmpyss_nac_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.nac.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyss_nac_s0">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2) // def int_hexagon_M2_dpmpyuu_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.dpmpyuu.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_dpmpyuu_acc_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.acc.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_dpmpyuu_nac_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.nac.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">; // // BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_up : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up">; // // BUILTIN_INFO(HEXAGON.M2_mpy_up_s1,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_up_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1">; // // BUILTIN_INFO(HEXAGON.M2_mpy_up_s1_sat,SI_ftype_SISI,2) // def int_hexagon_M2_mpy_up_s1_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up.s1.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpy_up_s1_sat">; // // BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2) // def int_hexagon_M2_mpyu_up : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyu.up">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyu_up">; // // BUILTIN_INFO(HEXAGON.M2_mpysu_up,SI_ftype_SISI,2) // def int_hexagon_M2_mpysu_up : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpysu.up">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpysu_up">; // // BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2) // def int_hexagon_M2_dpmpyss_rnd_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.rnd.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">; // // BUILTIN_INFO(HEXAGON.M4_mac_up_s1_sat,SI_ftype_SISISI,3) // def int_hexagon_M4_mac_up_s1_sat : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mac.up.s1.sat">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mac_up_s1_sat">; // // BUILTIN_INFO(HEXAGON.M4_nac_up_s1_sat,SI_ftype_SISISI,3) // def int_hexagon_M4_nac_up_s1_sat : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.nac.up.s1.sat">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_nac_up_s1_sat">; // // BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2) // def int_hexagon_M2_mpyi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyi">; // // BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2) // def int_hexagon_M2_mpyui : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyui">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_mpyui">; // // BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3) // def int_hexagon_M2_maci : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.maci">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_maci">; // // BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3) // def int_hexagon_M2_acci : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.acci">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_acci">; // // BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3) // def int_hexagon_M2_accii : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.accii">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_accii">; // // BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3) // def int_hexagon_M2_nacci : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.nacci">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_nacci">; // // BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3) // def int_hexagon_M2_naccii : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.naccii">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_naccii">; // // BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3) // def int_hexagon_M2_subacc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.subacc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_subacc">; // // BUILTIN_INFO(HEXAGON.M4_mpyrr_addr,SI_ftype_SISISI,3) // def int_hexagon_M4_mpyrr_addr : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mpyrr.addr">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addr">; // // BUILTIN_INFO(HEXAGON.M4_mpyri_addr_u2,SI_ftype_SISISI,3) // def int_hexagon_M4_mpyri_addr_u2 : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mpyri.addr.u2">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr_u2">; // // BUILTIN_INFO(HEXAGON.M4_mpyri_addr,SI_ftype_SISISI,3) // def int_hexagon_M4_mpyri_addr : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mpyri.addr">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addr">; // // BUILTIN_INFO(HEXAGON.M4_mpyri_addi,SI_ftype_SISISI,3) // def int_hexagon_M4_mpyri_addi : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mpyri.addi">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyri_addi">; // // BUILTIN_INFO(HEXAGON.M4_mpyrr_addi,SI_ftype_SISISI,3) // def int_hexagon_M4_mpyrr_addi : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.mpyrr.addi">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_mpyrr_addi">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2) // def int_hexagon_M2_vmpy2s_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2) // def int_hexagon_M2_vmpy2s_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_vmac2s_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_vmac2s_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2s_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2su_s0,DI_ftype_SISI,2) // def int_hexagon_M2_vmpy2su_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2su.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2su_s1,DI_ftype_SISI,2) // def int_hexagon_M2_vmpy2su_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2su.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_vmpy2su_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmac2su_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_vmac2su_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2su.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmac2su_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_vmac2su_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2su.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2su_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2) // def int_hexagon_M2_vmpy2s_s0pack : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0pack">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s0pack">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2) // def int_hexagon_M2_vmpy2s_s1pack : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1pack">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_vmpy2s_s1pack">; // // BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3) // def int_hexagon_M2_vmac2 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_vmac2">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_vmpy2es_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_vmpy2es_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vmpy2es_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vmac2es_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s0">; // // BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vmac2es_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es_s1">; // // BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vmac2es : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vmac2es">; // // BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vrmac_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrmac.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrmac_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_vrmpy_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrmpy.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrmpy_s0">; // // BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2) // def int_hexagon_M2_vdmpyrs_s0 : -Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s0">; +Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s0">; // // BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2) // def int_hexagon_M2_vdmpyrs_s1 : -Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s1">; +Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vdmpyrs_s1">; // // BUILTIN_INFO(HEXAGON.M5_vrmpybuu,DI_ftype_DIDI,2) // def int_hexagon_M5_vrmpybuu : -Hexagon_di_didi_Intrinsic<"HEXAGON.M5.vrmpybuu">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybuu">; // // BUILTIN_INFO(HEXAGON.M5_vrmacbuu,DI_ftype_DIDIDI,3) // def int_hexagon_M5_vrmacbuu : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M5.vrmacbuu">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbuu">; // // BUILTIN_INFO(HEXAGON.M5_vrmpybsu,DI_ftype_DIDI,2) // def int_hexagon_M5_vrmpybsu : -Hexagon_di_didi_Intrinsic<"HEXAGON.M5.vrmpybsu">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vrmpybsu">; // // BUILTIN_INFO(HEXAGON.M5_vrmacbsu,DI_ftype_DIDIDI,3) // def int_hexagon_M5_vrmacbsu : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M5.vrmacbsu">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vrmacbsu">; // // BUILTIN_INFO(HEXAGON.M5_vmpybuu,DI_ftype_SISI,2) // def int_hexagon_M5_vmpybuu : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M5.vmpybuu">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybuu">; // // BUILTIN_INFO(HEXAGON.M5_vmpybsu,DI_ftype_SISI,2) // def int_hexagon_M5_vmpybsu : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M5.vmpybsu">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M5_vmpybsu">; // // BUILTIN_INFO(HEXAGON.M5_vmacbuu,DI_ftype_DISISI,3) // def int_hexagon_M5_vmacbuu : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M5.vmacbuu">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbuu">; // // BUILTIN_INFO(HEXAGON.M5_vmacbsu,DI_ftype_DISISI,3) // def int_hexagon_M5_vmacbsu : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M5.vmacbsu">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M5_vmacbsu">; // // BUILTIN_INFO(HEXAGON.M5_vdmpybsu,DI_ftype_DIDI,2) // def int_hexagon_M5_vdmpybsu : -Hexagon_di_didi_Intrinsic<"HEXAGON.M5.vdmpybsu">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M5_vdmpybsu">; // // BUILTIN_INFO(HEXAGON.M5_vdmacbsu,DI_ftype_DIDIDI,3) // def int_hexagon_M5_vdmacbsu : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M5.vdmacbsu">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M5_vdmacbsu">; // // BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vdmacs_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s0">; // // BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vdmacs_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vdmacs_s1">; // // BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_vdmpys_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s0">; // // BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_vdmpys_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vdmpys_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2) // def int_hexagon_M2_cmpyrs_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2) // def int_hexagon_M2_cmpyrs_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrs_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2) // def int_hexagon_M2_cmpyrsc_s0 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s0">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2) // def int_hexagon_M2_cmpyrsc_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_cmpyrsc_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cmacs_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_cmacs_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacs_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cmacsc_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_cmacsc_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacsc_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2) // def int_hexagon_M2_cmpys_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2) // def int_hexagon_M2_cmpys_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpys_s1">; // // BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2) // def int_hexagon_M2_cmpysc_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2) // def int_hexagon_M2_cmpysc_s1 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s1">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpysc_s1">; // // BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cnacs_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s0">; // // BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_cnacs_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacs_s1">; // // BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cnacsc_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s0">; // // BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3) // def int_hexagon_M2_cnacsc_s1 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s1">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cnacsc_s1">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2) // def int_hexagon_M2_vrcmpys_s1 : -Hexagon_di_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1">; +Hexagon_di_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3) // def int_hexagon_M2_vrcmpys_acc_s1 : -Hexagon_di_didisi_Intrinsic<"HEXAGON.M2.vrcmpys.acc.s1">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_M2_vrcmpys_acc_s1">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2) // def int_hexagon_M2_vrcmpys_s1rp : -Hexagon_si_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1rp">; +Hexagon_si_disi_Intrinsic<"HEXAGON_M2_vrcmpys_s1rp">; // // BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacls_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacls_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmachs_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmachs_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyl_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyl_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyh_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyh_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacls_rs0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacls_rs1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacls_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmachs_rs0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmachs_rs1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmachs_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyl_rs0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyl_rs1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyl_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyh_rs0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyh_rs1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyh_rs1">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s0,DI_ftype_DIDI,2) // def int_hexagon_M4_vrmpyeh_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M4.vrmpyeh.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s0">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyeh_s1,DI_ftype_DIDI,2) // def int_hexagon_M4_vrmpyeh_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M4.vrmpyeh.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyeh_s1">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M4_vrmpyeh_acc_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.vrmpyeh.acc.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s0">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyeh_acc_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M4_vrmpyeh_acc_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.vrmpyeh.acc.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyeh_acc_s1">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s0,DI_ftype_DIDI,2) // def int_hexagon_M4_vrmpyoh_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M4.vrmpyoh.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s0">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyoh_s1,DI_ftype_DIDI,2) // def int_hexagon_M4_vrmpyoh_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M4.vrmpyoh.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M4_vrmpyoh_s1">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M4_vrmpyoh_acc_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.vrmpyoh.acc.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s0">; // // BUILTIN_INFO(HEXAGON.M4_vrmpyoh_acc_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M4_vrmpyoh_acc_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.vrmpyoh.acc.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_vrmpyoh_acc_s1">; // // BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2) // def int_hexagon_M2_hmmpyl_rs1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyl.rs1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_rs1">; // // BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2) // def int_hexagon_M2_hmmpyh_rs1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyh.rs1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">; // // BUILTIN_INFO(HEXAGON.M2_hmmpyl_s1,SI_ftype_SISI,2) // def int_hexagon_M2_hmmpyl_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyl.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyl_s1">; // // BUILTIN_INFO(HEXAGON.M2_hmmpyh_s1,SI_ftype_SISI,2) // def int_hexagon_M2_hmmpyh_s1 : -Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyh.s1">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_M2_hmmpyh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmaculs_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmaculs_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacuhs_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacuhs_s1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyul_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyul_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyuh_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyuh_s1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_s1">; // // BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmaculs_rs0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmaculs_rs1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmaculs_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacuhs_rs0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3) // def int_hexagon_M2_mmacuhs_rs1 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs1">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_mmacuhs_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyul_rs0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyul_rs1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyul_rs1">; // // BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyuh_rs0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs0">; // // BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2) // def int_hexagon_M2_mmpyuh_rs1 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs1">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_mmpyuh_rs1">; // // BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vrcmaci_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vrcmacr_s0 : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vrcmaci_s0c : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0c">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmaci_s0c">; // // BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vrcmacr_s0c : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0c">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vrcmacr_s0c">; // // BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cmaci_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmaci.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmaci_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3) // def int_hexagon_M2_cmacr_s0 : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacr.s0">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M2_cmacr_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_vrcmpyi_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2) // def int_hexagon_M2_vrcmpyr_s0 : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2) // def int_hexagon_M2_vrcmpyi_s0c : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0c">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyi_s0c">; // // BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2) // def int_hexagon_M2_vrcmpyr_s0c : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0c">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vrcmpyr_s0c">; // // BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2) // def int_hexagon_M2_cmpyi_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyi.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyi_s0">; // // BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2) // def int_hexagon_M2_cmpyr_s0 : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyr.s0">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M2_cmpyr_s0">; // // BUILTIN_INFO(HEXAGON.M4_cmpyi_wh,SI_ftype_DISI,2) // def int_hexagon_M4_cmpyi_wh : -Hexagon_si_disi_Intrinsic<"HEXAGON.M4.cmpyi.wh">; +Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_wh">; // // BUILTIN_INFO(HEXAGON.M4_cmpyr_wh,SI_ftype_DISI,2) // def int_hexagon_M4_cmpyr_wh : -Hexagon_si_disi_Intrinsic<"HEXAGON.M4.cmpyr.wh">; +Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_wh">; // // BUILTIN_INFO(HEXAGON.M4_cmpyi_whc,SI_ftype_DISI,2) // def int_hexagon_M4_cmpyi_whc : -Hexagon_si_disi_Intrinsic<"HEXAGON.M4.cmpyi.whc">; +Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyi_whc">; // // BUILTIN_INFO(HEXAGON.M4_cmpyr_whc,SI_ftype_DISI,2) // def int_hexagon_M4_cmpyr_whc : -Hexagon_si_disi_Intrinsic<"HEXAGON.M4.cmpyr.whc">; +Hexagon_si_disi_Intrinsic<"HEXAGON_M4_cmpyr_whc">; // // BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2) // def int_hexagon_M2_vcmpy_s0_sat_i : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.i">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_i">; // // BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2) // def int_hexagon_M2_vcmpy_s0_sat_r : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.r">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s0_sat_r">; // // BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2) // def int_hexagon_M2_vcmpy_s1_sat_i : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.i">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_i">; // // BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2) // def int_hexagon_M2_vcmpy_s1_sat_r : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.r">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vcmpy_s1_sat_r">; // // BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vcmac_s0_sat_i : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.i">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_i">; // // BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3) // def int_hexagon_M2_vcmac_s0_sat_r : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.r">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M2_vcmac_s0_sat_r">; // // BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2) // def int_hexagon_S2_vcrotate : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.vcrotate">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcrotate">; // // BUILTIN_INFO(HEXAGON.S4_vrcrotate_acc,DI_ftype_DIDISISI,4) // def int_hexagon_S4_vrcrotate_acc : -Hexagon_di_didisisi_Intrinsic<"HEXAGON.S4.vrcrotate.acc">; +Hexagon_di_didisisi_Intrinsic<"HEXAGON_S4_vrcrotate_acc">; // // BUILTIN_INFO(HEXAGON.S4_vrcrotate,DI_ftype_DISISI,3) // def int_hexagon_S4_vrcrotate : -Hexagon_di_disisi_Intrinsic<"HEXAGON.S4.vrcrotate">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_vrcrotate">; // // BUILTIN_INFO(HEXAGON.S2_vcnegh,DI_ftype_DISI,2) // def int_hexagon_S2_vcnegh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.vcnegh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_vcnegh">; // // BUILTIN_INFO(HEXAGON.S2_vrcnegh,DI_ftype_DIDISI,3) // def int_hexagon_S2_vrcnegh : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.vrcnegh">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vrcnegh">; // // BUILTIN_INFO(HEXAGON.M4_pmpyw,DI_ftype_SISI,2) // def int_hexagon_M4_pmpyw : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M4.pmpyw">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_pmpyw">; // // BUILTIN_INFO(HEXAGON.M4_vpmpyh,DI_ftype_SISI,2) // def int_hexagon_M4_vpmpyh : -Hexagon_di_sisi_Intrinsic<"HEXAGON.M4.vpmpyh">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_M4_vpmpyh">; // // BUILTIN_INFO(HEXAGON.M4_pmpyw_acc,DI_ftype_DISISI,3) // def int_hexagon_M4_pmpyw_acc : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M4.pmpyw.acc">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_pmpyw_acc">; // // BUILTIN_INFO(HEXAGON.M4_vpmpyh_acc,DI_ftype_DISISI,3) // def int_hexagon_M4_vpmpyh_acc : -Hexagon_di_disisi_Intrinsic<"HEXAGON.M4.vpmpyh.acc">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_M4_vpmpyh_acc">; // // BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2) // def int_hexagon_A2_add : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.add">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_add">; // // BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2) // def int_hexagon_A2_sub : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.sub">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_sub">; // // BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2) // def int_hexagon_A2_addsat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addsat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addsat">; // // BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2) // def int_hexagon_A2_subsat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subsat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subsat">; // // BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2) // def int_hexagon_A2_addi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addi">; // // BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2) // def int_hexagon_A2_addh_l16_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_ll">; // // BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2) // def int_hexagon_A2_addh_l16_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_hl">; // // BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2) // def int_hexagon_A2_addh_l16_sat_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">; // // BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2) // def int_hexagon_A2_addh_l16_sat_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_l16_sat_hl">; // // BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2) // def int_hexagon_A2_subh_l16_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_ll">; // // BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2) // def int_hexagon_A2_subh_l16_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_hl">; // // BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2) // def int_hexagon_A2_subh_l16_sat_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">; // // BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2) // def int_hexagon_A2_subh_l16_sat_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_l16_sat_hl">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_ll">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_lh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.lh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_lh">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hl">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_hh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_hh">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_sat_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_ll">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_sat_lh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.lh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_lh">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_sat_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hl">; // // BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2) // def int_hexagon_A2_addh_h16_sat_hh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_addh_h16_sat_hh">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_ll">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_lh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.lh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_lh">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hl">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_hh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_hh">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_sat_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_ll">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_sat_lh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.lh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_lh">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_sat_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hl">; // // BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2) // def int_hexagon_A2_subh_h16_sat_hh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subh_h16_sat_hh">; // // BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1) // def int_hexagon_A2_aslh : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.aslh">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_aslh">; // // BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1) // def int_hexagon_A2_asrh : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.asrh">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_asrh">; // // BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2) // def int_hexagon_A2_addp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addp">; // // BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2) // def int_hexagon_A2_addpsat : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addpsat">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_addpsat">; // // BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2) // def int_hexagon_A2_addsp : -Hexagon_di_sidi_Intrinsic<"HEXAGON.A2.addsp">; +Hexagon_di_sidi_Intrinsic<"HEXAGON_A2_addsp">; // // BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2) // def int_hexagon_A2_subp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.subp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_subp">; // // BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1) // def int_hexagon_A2_neg : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.neg">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_neg">; // // BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1) // def int_hexagon_A2_negsat : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.negsat">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_negsat">; // // BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1) // def int_hexagon_A2_abs : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.abs">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_abs">; // // BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1) // def int_hexagon_A2_abssat : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.abssat">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_abssat">; // // BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1) // def int_hexagon_A2_vconj : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.vconj">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_vconj">; // // BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1) // def int_hexagon_A2_negp : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.negp">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_negp">; // // BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1) // def int_hexagon_A2_absp : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.absp">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_absp">; // // BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2) // def int_hexagon_A2_max : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.max">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_max">; // // BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2) // def int_hexagon_A2_maxu : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.maxu">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_maxu">; // // BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2) // def int_hexagon_A2_min : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.min">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_min">; // // BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2) // def int_hexagon_A2_minu : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.minu">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_minu">; // // BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2) // def int_hexagon_A2_maxp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.maxp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxp">; // // BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2) // def int_hexagon_A2_maxup : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.maxup">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_maxup">; // // BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2) // def int_hexagon_A2_minp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.minp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minp">; // // BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2) // def int_hexagon_A2_minup : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.minup">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_minup">; // // BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1) // def int_hexagon_A2_tfr : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfr">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfr">; // // BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1) // def int_hexagon_A2_tfrsi : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfrsi">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_tfrsi">; // // BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1) // def int_hexagon_A2_tfrp : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.tfrp">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_tfrp">; // // BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1) // def int_hexagon_A2_tfrpi : -Hexagon_di_si_Intrinsic<"HEXAGON.A2.tfrpi">; +Hexagon_di_si_Intrinsic<"HEXAGON_A2_tfrpi">; // // BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1) // def int_hexagon_A2_zxtb : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxtb">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxtb">; // // BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1) // def int_hexagon_A2_sxtb : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxtb">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxtb">; // // BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1) // def int_hexagon_A2_zxth : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxth">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_zxth">; // // BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1) // def int_hexagon_A2_sxth : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxth">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_sxth">; // // BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2) // def int_hexagon_A2_combinew : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combinew">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combinew">; // // BUILTIN_INFO(HEXAGON.A4_combineri,DI_ftype_SISI,2) // def int_hexagon_A4_combineri : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineri">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineri">; // // BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_SISI,2) // def int_hexagon_A4_combineir : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineir">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_combineir">; // // BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2) // def int_hexagon_A2_combineii : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combineii">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A2_combineii">; // // BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2) // def int_hexagon_A2_combine_hh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hh">; // // BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2) // def int_hexagon_A2_combine_hl : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hl">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_hl">; // // BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2) // def int_hexagon_A2_combine_lh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.lh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_lh">; // // BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2) // def int_hexagon_A2_combine_ll : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.ll">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_combine_ll">; // // BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2) // def int_hexagon_A2_tfril : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfril">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfril">; // // BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2) // def int_hexagon_A2_tfrih : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfrih">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_tfrih">; // // BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2) // def int_hexagon_A2_and : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.and">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_and">; // // BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2) // def int_hexagon_A2_or : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.or">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_or">; // // BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2) // def int_hexagon_A2_xor : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.xor">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_xor">; // // BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1) // def int_hexagon_A2_not : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.not">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_not">; // // BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3) // def int_hexagon_M2_xor_xacc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.xor.xacc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M2_xor_xacc">; // // BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3) // def int_hexagon_M4_xor_xacc : -Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.xor.xacc">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_M4_xor_xacc">; // // BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2) // def int_hexagon_A4_andn : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.andn">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_andn">; // // BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2) // def int_hexagon_A4_orn : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.orn">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_orn">; // // BUILTIN_INFO(HEXAGON.A4_andnp,DI_ftype_DIDI,2) // def int_hexagon_A4_andnp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A4.andnp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A4_andnp">; // // BUILTIN_INFO(HEXAGON.A4_ornp,DI_ftype_DIDI,2) // def int_hexagon_A4_ornp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A4.ornp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A4_ornp">; // // BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3) // def int_hexagon_S4_addaddi : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addaddi">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addaddi">; // // BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3) // def int_hexagon_S4_subaddi : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subaddi">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subaddi">; // // BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3) // def int_hexagon_M4_and_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_and">; // // BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3) // def int_hexagon_M4_and_andn : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and.andn">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_andn">; // // BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3) // def int_hexagon_M4_and_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_or">; // // BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3) // def int_hexagon_M4_and_xor : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and.xor">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_and_xor">; // // BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3) // def int_hexagon_M4_or_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_and">; // // BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3) // def int_hexagon_M4_or_andn : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or.andn">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_andn">; // // BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3) // def int_hexagon_M4_or_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_or">; // // BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3) // def int_hexagon_M4_or_xor : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or.xor">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_or_xor">; // // BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3) // def int_hexagon_S4_or_andix : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or.andix">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andix">; // // BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3) // def int_hexagon_S4_or_andi : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or.andi">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_andi">; // // BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3) // def int_hexagon_S4_or_ori : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or.ori">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_or_ori">; // // BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3) // def int_hexagon_M4_xor_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_and">; // // BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3) // def int_hexagon_M4_xor_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_or">; // // BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3) // def int_hexagon_M4_xor_andn : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor.andn">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_M4_xor_andn">; // // BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2) // def int_hexagon_A2_subri : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subri">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_subri">; // // BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2) // def int_hexagon_A2_andir : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.andir">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_andir">; // // BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2) // def int_hexagon_A2_orir : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.orir">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_orir">; // // BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2) // def int_hexagon_A2_andp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.andp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_andp">; // // BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2) // def int_hexagon_A2_orp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.orp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_orp">; // // BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2) // def int_hexagon_A2_xorp : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.xorp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_xorp">; // // BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1) // def int_hexagon_A2_notp : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.notp">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_notp">; // // BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1) // def int_hexagon_A2_sxtw : -Hexagon_di_si_Intrinsic<"HEXAGON.A2.sxtw">; +Hexagon_di_si_Intrinsic<"HEXAGON_A2_sxtw">; // // BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1) // def int_hexagon_A2_sat : -Hexagon_si_di_Intrinsic<"HEXAGON.A2.sat">; +Hexagon_si_di_Intrinsic<"HEXAGON_A2_sat">; // // BUILTIN_INFO(HEXAGON.A2_roundsat,SI_ftype_DI,1) // def int_hexagon_A2_roundsat : -Hexagon_si_di_Intrinsic<"HEXAGON.A2.roundsat">; +Hexagon_si_di_Intrinsic<"HEXAGON_A2_roundsat">; // // BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1) // def int_hexagon_A2_sath : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.sath">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_sath">; // // BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1) // def int_hexagon_A2_satuh : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.satuh">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_satuh">; // // BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1) // def int_hexagon_A2_satub : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.satub">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_satub">; // // BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1) // def int_hexagon_A2_satb : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.satb">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_satb">; // // BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddub">; // // BUILTIN_INFO(HEXAGON.A2_vaddb_map,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddb_map : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddb.map">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddb_map">; // // BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddubs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddubs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddubs">; // // BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddh">; // // BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddhs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddhs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddhs">; // // BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2) // def int_hexagon_A2_vadduhs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vadduhs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vadduhs">; // // BUILTIN_INFO(HEXAGON.A5_vaddhubs,SI_ftype_DIDI,2) // def int_hexagon_A5_vaddhubs : -Hexagon_si_didi_Intrinsic<"HEXAGON.A5.vaddhubs">; +Hexagon_si_didi_Intrinsic<"HEXAGON_A5_vaddhubs">; // // BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddw">; // // BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2) // def int_hexagon_A2_vaddws : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddws">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vaddws">; // // BUILTIN_INFO(HEXAGON.S4_vxaddsubw,DI_ftype_DIDI,2) // def int_hexagon_S4_vxaddsubw : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxaddsubw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubw">; // // BUILTIN_INFO(HEXAGON.S4_vxsubaddw,DI_ftype_DIDI,2) // def int_hexagon_S4_vxsubaddw : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxsubaddw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddw">; // // BUILTIN_INFO(HEXAGON.S4_vxaddsubh,DI_ftype_DIDI,2) // def int_hexagon_S4_vxaddsubh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxaddsubh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubh">; // // BUILTIN_INFO(HEXAGON.S4_vxsubaddh,DI_ftype_DIDI,2) // def int_hexagon_S4_vxsubaddh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxsubaddh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddh">; // // BUILTIN_INFO(HEXAGON.S4_vxaddsubhr,DI_ftype_DIDI,2) // def int_hexagon_S4_vxaddsubhr : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxaddsubhr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxaddsubhr">; // // BUILTIN_INFO(HEXAGON.S4_vxsubaddhr,DI_ftype_DIDI,2) // def int_hexagon_S4_vxsubaddhr : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.vxsubaddhr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_vxsubaddhr">; // // BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2) // def int_hexagon_A2_svavgh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavgh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavgh">; // // BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2) // def int_hexagon_A2_svavghs : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavghs">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svavghs">; // // BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2) // def int_hexagon_A2_svnavgh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svnavgh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svnavgh">; // // BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2) // def int_hexagon_A2_svaddh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddh">; // // BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2) // def int_hexagon_A2_svaddhs : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddhs">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svaddhs">; // // BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2) // def int_hexagon_A2_svadduhs : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svadduhs">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svadduhs">; // // BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2) // def int_hexagon_A2_svsubh : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubh">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubh">; // // BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2) // def int_hexagon_A2_svsubhs : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubhs">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubhs">; // // BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2) // def int_hexagon_A2_svsubuhs : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubuhs">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A2_svsubuhs">; // // BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2) // def int_hexagon_A2_vraddub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vraddub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vraddub">; // // BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3) // def int_hexagon_A2_vraddub_acc : -Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vraddub.acc">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vraddub_acc">; // // BUILTIN_INFO(HEXAGON.M2_vraddh,SI_ftype_DIDI,2) // def int_hexagon_M2_vraddh : -Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vraddh">; +Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vraddh">; // // BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2) // def int_hexagon_M2_vradduh : -Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vradduh">; +Hexagon_si_didi_Intrinsic<"HEXAGON_M2_vradduh">; // // BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubub">; // // BUILTIN_INFO(HEXAGON.A2_vsubb_map,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubb_map : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubb.map">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubb_map">; // // BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2) // def int_hexagon_A2_vsububs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsububs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsububs">; // // BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubh">; // // BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubhs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubhs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubhs">; // // BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubuhs : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubuhs">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubuhs">; // // BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubw">; // // BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2) // def int_hexagon_A2_vsubws : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubws">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vsubws">; // // BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1) // def int_hexagon_A2_vabsh : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsh">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsh">; // // BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1) // def int_hexagon_A2_vabshsat : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabshsat">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabshsat">; // // BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1) // def int_hexagon_A2_vabsw : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsw">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabsw">; // // BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1) // def int_hexagon_A2_vabswsat : -Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabswsat">; +Hexagon_di_di_Intrinsic<"HEXAGON_A2_vabswsat">; // // BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2) // def int_hexagon_M2_vabsdiffw : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffw">; // // BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2) // def int_hexagon_M2_vabsdiffh : -Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_M2_vabsdiffh">; // // BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2) // def int_hexagon_A2_vrsadub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vrsadub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vrsadub">; // // BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3) // def int_hexagon_A2_vrsadub_acc : -Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vrsadub.acc">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_A2_vrsadub_acc">; // // BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgub">; // // BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2) // def int_hexagon_A2_vavguh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguh">; // // BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgh">; // // BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavgh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgh">; // // BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgw">; // // BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavgw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgw">; // // BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgwr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwr">; // // BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavgwr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwr">; // // BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgwcr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwcr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgwcr">; // // BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavgwcr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwcr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavgwcr">; // // BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavghcr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghcr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghcr">; // // BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavghcr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghcr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghcr">; // // BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2) // def int_hexagon_A2_vavguw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguw">; // // BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavguwr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguwr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguwr">; // // BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavgubr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgubr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavgubr">; // // BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavguhr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguhr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavguhr">; // // BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2) // def int_hexagon_A2_vavghr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vavghr">; // // BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2) // def int_hexagon_A2_vnavghr : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghr">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vnavghr">; // // BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2) // def int_hexagon_A4_round_ri : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round.ri">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri">; // // BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2) // def int_hexagon_A4_round_rr : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round.rr">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr">; // // BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2) // def int_hexagon_A4_round_ri_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round.ri.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_ri_sat">; // // BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2) // def int_hexagon_A4_round_rr_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round.rr.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_round_rr_sat">; // // BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2) // def int_hexagon_A4_cround_ri : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround.ri">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_ri">; // // BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2) // def int_hexagon_A4_cround_rr : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround.rr">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_cround_rr">; // // BUILTIN_INFO(HEXAGON.A4_vrminh,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrminh : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrminh">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminh">; // // BUILTIN_INFO(HEXAGON.A4_vrmaxh,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrmaxh : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrmaxh">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxh">; // // BUILTIN_INFO(HEXAGON.A4_vrminuh,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrminuh : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrminuh">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuh">; // // BUILTIN_INFO(HEXAGON.A4_vrmaxuh,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrmaxuh : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrmaxuh">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuh">; // // BUILTIN_INFO(HEXAGON.A4_vrminw,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrminw : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrminw">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminw">; // // BUILTIN_INFO(HEXAGON.A4_vrmaxw,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrmaxw : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrmaxw">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxw">; // // BUILTIN_INFO(HEXAGON.A4_vrminuw,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrminuw : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrminuw">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrminuw">; // // BUILTIN_INFO(HEXAGON.A4_vrmaxuw,DI_ftype_DIDISI,3) // def int_hexagon_A4_vrmaxuw : -Hexagon_di_didisi_Intrinsic<"HEXAGON.A4.vrmaxuw">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_A4_vrmaxuw">; // // BUILTIN_INFO(HEXAGON.A2_vminb,DI_ftype_DIDI,2) // def int_hexagon_A2_vminb : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminb">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminb">; // // BUILTIN_INFO(HEXAGON.A2_vmaxb,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxb : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxb">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxb">; // // BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2) // def int_hexagon_A2_vminub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminub">; // // BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxub : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxub">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxub">; // // BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2) // def int_hexagon_A2_vminh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminh">; // // BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxh">; // // BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2) // def int_hexagon_A2_vminuh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuh">; // // BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxuh : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuh">; // // BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2) // def int_hexagon_A2_vminw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminw">; // // BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxw">; // // BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2) // def int_hexagon_A2_vminuw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vminuw">; // // BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2) // def int_hexagon_A2_vmaxuw : -Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuw">; +Hexagon_di_didi_Intrinsic<"HEXAGON_A2_vmaxuw">; // // BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2) // def int_hexagon_A4_modwrapu : -Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.modwrapu">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_A4_modwrapu">; // // BUILTIN_INFO(HEXAGON.F2_sfadd,SF_ftype_SFSF,2) // def int_hexagon_F2_sfadd : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sfadd">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfadd">; // // BUILTIN_INFO(HEXAGON.F2_sfsub,SF_ftype_SFSF,2) // def int_hexagon_F2_sfsub : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sfsub">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfsub">; // // BUILTIN_INFO(HEXAGON.F2_sfmpy,SF_ftype_SFSF,2) // def int_hexagon_F2_sfmpy : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sfmpy">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmpy">; // // BUILTIN_INFO(HEXAGON.F2_sffma,SF_ftype_SFSFSF,3) // def int_hexagon_F2_sffma : -Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON.F2.sffma">; +Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma">; // // BUILTIN_INFO(HEXAGON.F2_sffma_sc,SF_ftype_SFSFSFQI,4) // def int_hexagon_F2_sffma_sc : -Hexagon_sf_sfsfsfqi_Intrinsic<"HEXAGON.F2.sffma.sc">; +Hexagon_sf_sfsfsfqi_Intrinsic<"HEXAGON_F2_sffma_sc">; // // BUILTIN_INFO(HEXAGON.F2_sffms,SF_ftype_SFSFSF,3) // def int_hexagon_F2_sffms : -Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON.F2.sffms">; +Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms">; // // BUILTIN_INFO(HEXAGON.F2_sffma_lib,SF_ftype_SFSFSF,3) // def int_hexagon_F2_sffma_lib : -Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON.F2.sffma.lib">; +Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffma_lib">; // // BUILTIN_INFO(HEXAGON.F2_sffms_lib,SF_ftype_SFSFSF,3) // def int_hexagon_F2_sffms_lib : -Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON.F2.sffms.lib">; +Hexagon_sf_sfsfsf_Intrinsic<"HEXAGON_F2_sffms_lib">; // // BUILTIN_INFO(HEXAGON.F2_sfcmpeq,QI_ftype_SFSF,2) // def int_hexagon_F2_sfcmpeq : -Hexagon_qi_sfsf_Intrinsic<"HEXAGON.F2.sfcmpeq">; +Hexagon_qi_sfsf_Intrinsic<"HEXAGON_F2_sfcmpeq">; // // BUILTIN_INFO(HEXAGON.F2_sfcmpgt,QI_ftype_SFSF,2) // def int_hexagon_F2_sfcmpgt : -Hexagon_qi_sfsf_Intrinsic<"HEXAGON.F2.sfcmpgt">; +Hexagon_qi_sfsf_Intrinsic<"HEXAGON_F2_sfcmpgt">; // // BUILTIN_INFO(HEXAGON.F2_sfcmpge,QI_ftype_SFSF,2) // def int_hexagon_F2_sfcmpge : -Hexagon_qi_sfsf_Intrinsic<"HEXAGON.F2.sfcmpge">; +Hexagon_qi_sfsf_Intrinsic<"HEXAGON_F2_sfcmpge">; // // BUILTIN_INFO(HEXAGON.F2_sfcmpuo,QI_ftype_SFSF,2) // def int_hexagon_F2_sfcmpuo : -Hexagon_qi_sfsf_Intrinsic<"HEXAGON.F2.sfcmpuo">; +Hexagon_qi_sfsf_Intrinsic<"HEXAGON_F2_sfcmpuo">; // // BUILTIN_INFO(HEXAGON.F2_sfmax,SF_ftype_SFSF,2) // def int_hexagon_F2_sfmax : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sfmax">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmax">; // // BUILTIN_INFO(HEXAGON.F2_sfmin,SF_ftype_SFSF,2) // def int_hexagon_F2_sfmin : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sfmin">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sfmin">; // // BUILTIN_INFO(HEXAGON.F2_sfclass,QI_ftype_SFSI,2) // def int_hexagon_F2_sfclass : -Hexagon_qi_sfsi_Intrinsic<"HEXAGON.F2.sfclass">; +Hexagon_qi_sfsi_Intrinsic<"HEXAGON_F2_sfclass">; // // BUILTIN_INFO(HEXAGON.F2_sfimm_p,SF_ftype_SI,1) // def int_hexagon_F2_sfimm_p : -Hexagon_sf_si_Intrinsic<"HEXAGON.F2.sfimm.p">; +Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_p">; // // BUILTIN_INFO(HEXAGON.F2_sfimm_n,SF_ftype_SI,1) // def int_hexagon_F2_sfimm_n : -Hexagon_sf_si_Intrinsic<"HEXAGON.F2.sfimm.n">; +Hexagon_sf_si_Intrinsic<"HEXAGON_F2_sfimm_n">; // // BUILTIN_INFO(HEXAGON.F2_sffixupn,SF_ftype_SFSF,2) // def int_hexagon_F2_sffixupn : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sffixupn">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupn">; // // BUILTIN_INFO(HEXAGON.F2_sffixupd,SF_ftype_SFSF,2) // def int_hexagon_F2_sffixupd : -Hexagon_sf_sfsf_Intrinsic<"HEXAGON.F2.sffixupd">; +Hexagon_sf_sfsf_Intrinsic<"HEXAGON_F2_sffixupd">; // // BUILTIN_INFO(HEXAGON.F2_sffixupr,SF_ftype_SF,1) // def int_hexagon_F2_sffixupr : -Hexagon_sf_sf_Intrinsic<"HEXAGON.F2.sffixupr">; +Hexagon_sf_sf_Intrinsic<"HEXAGON_F2_sffixupr">; // // BUILTIN_INFO(HEXAGON.F2_dfadd,DF_ftype_DFDF,2) // def int_hexagon_F2_dfadd : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dfadd">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dfadd">; // // BUILTIN_INFO(HEXAGON.F2_dfsub,DF_ftype_DFDF,2) // def int_hexagon_F2_dfsub : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dfsub">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dfsub">; // // BUILTIN_INFO(HEXAGON.F2_dfmpy,DF_ftype_DFDF,2) // def int_hexagon_F2_dfmpy : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dfmpy">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dfmpy">; // // BUILTIN_INFO(HEXAGON.F2_dffma,DF_ftype_DFDFDF,3) // def int_hexagon_F2_dffma : -Hexagon_df_dfdfdf_Intrinsic<"HEXAGON.F2.dffma">; +Hexagon_df_dfdfdf_Intrinsic<"HEXAGON_F2_dffma">; // // BUILTIN_INFO(HEXAGON.F2_dffms,DF_ftype_DFDFDF,3) // def int_hexagon_F2_dffms : -Hexagon_df_dfdfdf_Intrinsic<"HEXAGON.F2.dffms">; +Hexagon_df_dfdfdf_Intrinsic<"HEXAGON_F2_dffms">; // // BUILTIN_INFO(HEXAGON.F2_dffma_lib,DF_ftype_DFDFDF,3) // def int_hexagon_F2_dffma_lib : -Hexagon_df_dfdfdf_Intrinsic<"HEXAGON.F2.dffma.lib">; +Hexagon_df_dfdfdf_Intrinsic<"HEXAGON_F2_dffma_lib">; // // BUILTIN_INFO(HEXAGON.F2_dffms_lib,DF_ftype_DFDFDF,3) // def int_hexagon_F2_dffms_lib : -Hexagon_df_dfdfdf_Intrinsic<"HEXAGON.F2.dffms.lib">; +Hexagon_df_dfdfdf_Intrinsic<"HEXAGON_F2_dffms_lib">; // // BUILTIN_INFO(HEXAGON.F2_dffma_sc,DF_ftype_DFDFDFQI,4) // def int_hexagon_F2_dffma_sc : -Hexagon_df_dfdfdfqi_Intrinsic<"HEXAGON.F2.dffma.sc">; +Hexagon_df_dfdfdfqi_Intrinsic<"HEXAGON_F2_dffma_sc">; // // BUILTIN_INFO(HEXAGON.F2_dfmax,DF_ftype_DFDF,2) // def int_hexagon_F2_dfmax : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dfmax">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dfmax">; // // BUILTIN_INFO(HEXAGON.F2_dfmin,DF_ftype_DFDF,2) // def int_hexagon_F2_dfmin : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dfmin">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dfmin">; // // BUILTIN_INFO(HEXAGON.F2_dfcmpeq,QI_ftype_DFDF,2) // def int_hexagon_F2_dfcmpeq : -Hexagon_qi_dfdf_Intrinsic<"HEXAGON.F2.dfcmpeq">; +Hexagon_qi_dfdf_Intrinsic<"HEXAGON_F2_dfcmpeq">; // // BUILTIN_INFO(HEXAGON.F2_dfcmpgt,QI_ftype_DFDF,2) // def int_hexagon_F2_dfcmpgt : -Hexagon_qi_dfdf_Intrinsic<"HEXAGON.F2.dfcmpgt">; +Hexagon_qi_dfdf_Intrinsic<"HEXAGON_F2_dfcmpgt">; // // BUILTIN_INFO(HEXAGON.F2_dfcmpge,QI_ftype_DFDF,2) // def int_hexagon_F2_dfcmpge : -Hexagon_qi_dfdf_Intrinsic<"HEXAGON.F2.dfcmpge">; +Hexagon_qi_dfdf_Intrinsic<"HEXAGON_F2_dfcmpge">; // // BUILTIN_INFO(HEXAGON.F2_dfcmpuo,QI_ftype_DFDF,2) // def int_hexagon_F2_dfcmpuo : -Hexagon_qi_dfdf_Intrinsic<"HEXAGON.F2.dfcmpuo">; +Hexagon_qi_dfdf_Intrinsic<"HEXAGON_F2_dfcmpuo">; // // BUILTIN_INFO(HEXAGON.F2_dfclass,QI_ftype_DFSI,2) // def int_hexagon_F2_dfclass : -Hexagon_qi_dfsi_Intrinsic<"HEXAGON.F2.dfclass">; +Hexagon_qi_dfsi_Intrinsic<"HEXAGON_F2_dfclass">; // // BUILTIN_INFO(HEXAGON.F2_dfimm_p,DF_ftype_SI,1) // def int_hexagon_F2_dfimm_p : -Hexagon_df_si_Intrinsic<"HEXAGON.F2.dfimm.p">; +Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_p">; // // BUILTIN_INFO(HEXAGON.F2_dfimm_n,DF_ftype_SI,1) // def int_hexagon_F2_dfimm_n : -Hexagon_df_si_Intrinsic<"HEXAGON.F2.dfimm.n">; +Hexagon_df_si_Intrinsic<"HEXAGON_F2_dfimm_n">; // // BUILTIN_INFO(HEXAGON.F2_dffixupn,DF_ftype_DFDF,2) // def int_hexagon_F2_dffixupn : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dffixupn">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dffixupn">; // // BUILTIN_INFO(HEXAGON.F2_dffixupd,DF_ftype_DFDF,2) // def int_hexagon_F2_dffixupd : -Hexagon_df_dfdf_Intrinsic<"HEXAGON.F2.dffixupd">; +Hexagon_df_dfdf_Intrinsic<"HEXAGON_F2_dffixupd">; // // BUILTIN_INFO(HEXAGON.F2_dffixupr,DF_ftype_DF,1) // def int_hexagon_F2_dffixupr : -Hexagon_df_df_Intrinsic<"HEXAGON.F2.dffixupr">; +Hexagon_df_df_Intrinsic<"HEXAGON_F2_dffixupr">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2df,DF_ftype_SF,1) // def int_hexagon_F2_conv_sf2df : -Hexagon_df_sf_Intrinsic<"HEXAGON.F2.conv.sf2df">; +Hexagon_df_sf_Intrinsic<"HEXAGON_F2_conv_sf2df">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2sf,SF_ftype_DF,1) // def int_hexagon_F2_conv_df2sf : -Hexagon_sf_df_Intrinsic<"HEXAGON.F2.conv.df2sf">; +Hexagon_sf_df_Intrinsic<"HEXAGON_F2_conv_df2sf">; // // BUILTIN_INFO(HEXAGON.F2_conv_uw2sf,SF_ftype_SI,1) // def int_hexagon_F2_conv_uw2sf : -Hexagon_sf_si_Intrinsic<"HEXAGON.F2.conv.uw2sf">; +Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_uw2sf">; // // BUILTIN_INFO(HEXAGON.F2_conv_uw2df,DF_ftype_SI,1) // def int_hexagon_F2_conv_uw2df : -Hexagon_df_si_Intrinsic<"HEXAGON.F2.conv.uw2df">; +Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_uw2df">; // // BUILTIN_INFO(HEXAGON.F2_conv_w2sf,SF_ftype_SI,1) // def int_hexagon_F2_conv_w2sf : -Hexagon_sf_si_Intrinsic<"HEXAGON.F2.conv.w2sf">; +Hexagon_sf_si_Intrinsic<"HEXAGON_F2_conv_w2sf">; // // BUILTIN_INFO(HEXAGON.F2_conv_w2df,DF_ftype_SI,1) // def int_hexagon_F2_conv_w2df : -Hexagon_df_si_Intrinsic<"HEXAGON.F2.conv.w2df">; +Hexagon_df_si_Intrinsic<"HEXAGON_F2_conv_w2df">; // // BUILTIN_INFO(HEXAGON.F2_conv_ud2sf,SF_ftype_DI,1) // def int_hexagon_F2_conv_ud2sf : -Hexagon_sf_di_Intrinsic<"HEXAGON.F2.conv.ud2sf">; +Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_ud2sf">; // // BUILTIN_INFO(HEXAGON.F2_conv_ud2df,DF_ftype_DI,1) // def int_hexagon_F2_conv_ud2df : -Hexagon_df_di_Intrinsic<"HEXAGON.F2.conv.ud2df">; +Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_ud2df">; // // BUILTIN_INFO(HEXAGON.F2_conv_d2sf,SF_ftype_DI,1) // def int_hexagon_F2_conv_d2sf : -Hexagon_sf_di_Intrinsic<"HEXAGON.F2.conv.d2sf">; +Hexagon_sf_di_Intrinsic<"HEXAGON_F2_conv_d2sf">; // // BUILTIN_INFO(HEXAGON.F2_conv_d2df,DF_ftype_DI,1) // def int_hexagon_F2_conv_d2df : -Hexagon_df_di_Intrinsic<"HEXAGON.F2.conv.d2df">; +Hexagon_df_di_Intrinsic<"HEXAGON_F2_conv_d2df">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2uw,SI_ftype_SF,1) // def int_hexagon_F2_conv_sf2uw : -Hexagon_si_sf_Intrinsic<"HEXAGON.F2.conv.sf2uw">; +Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2w,SI_ftype_SF,1) // def int_hexagon_F2_conv_sf2w : -Hexagon_si_sf_Intrinsic<"HEXAGON.F2.conv.sf2w">; +Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2ud,DI_ftype_SF,1) // def int_hexagon_F2_conv_sf2ud : -Hexagon_di_sf_Intrinsic<"HEXAGON.F2.conv.sf2ud">; +Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2d,DI_ftype_SF,1) // def int_hexagon_F2_conv_sf2d : -Hexagon_di_sf_Intrinsic<"HEXAGON.F2.conv.sf2d">; +Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2uw,SI_ftype_DF,1) // def int_hexagon_F2_conv_df2uw : -Hexagon_si_df_Intrinsic<"HEXAGON.F2.conv.df2uw">; +Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2w,SI_ftype_DF,1) // def int_hexagon_F2_conv_df2w : -Hexagon_si_df_Intrinsic<"HEXAGON.F2.conv.df2w">; +Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2ud,DI_ftype_DF,1) // def int_hexagon_F2_conv_df2ud : -Hexagon_di_df_Intrinsic<"HEXAGON.F2.conv.df2ud">; +Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2d,DI_ftype_DF,1) // def int_hexagon_F2_conv_df2d : -Hexagon_di_df_Intrinsic<"HEXAGON.F2.conv.df2d">; +Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2uw_chop,SI_ftype_SF,1) // def int_hexagon_F2_conv_sf2uw_chop : -Hexagon_si_sf_Intrinsic<"HEXAGON.F2.conv.sf2uw.chop">; +Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2uw_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2w_chop,SI_ftype_SF,1) // def int_hexagon_F2_conv_sf2w_chop : -Hexagon_si_sf_Intrinsic<"HEXAGON.F2.conv.sf2w.chop">; +Hexagon_si_sf_Intrinsic<"HEXAGON_F2_conv_sf2w_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2ud_chop,DI_ftype_SF,1) // def int_hexagon_F2_conv_sf2ud_chop : -Hexagon_di_sf_Intrinsic<"HEXAGON.F2.conv.sf2ud.chop">; +Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2ud_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_sf2d_chop,DI_ftype_SF,1) // def int_hexagon_F2_conv_sf2d_chop : -Hexagon_di_sf_Intrinsic<"HEXAGON.F2.conv.sf2d.chop">; +Hexagon_di_sf_Intrinsic<"HEXAGON_F2_conv_sf2d_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2uw_chop,SI_ftype_DF,1) // def int_hexagon_F2_conv_df2uw_chop : -Hexagon_si_df_Intrinsic<"HEXAGON.F2.conv.df2uw.chop">; +Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2uw_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2w_chop,SI_ftype_DF,1) // def int_hexagon_F2_conv_df2w_chop : -Hexagon_si_df_Intrinsic<"HEXAGON.F2.conv.df2w.chop">; +Hexagon_si_df_Intrinsic<"HEXAGON_F2_conv_df2w_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2ud_chop,DI_ftype_DF,1) // def int_hexagon_F2_conv_df2ud_chop : -Hexagon_di_df_Intrinsic<"HEXAGON.F2.conv.df2ud.chop">; +Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2ud_chop">; // // BUILTIN_INFO(HEXAGON.F2_conv_df2d_chop,DI_ftype_DF,1) // def int_hexagon_F2_conv_df2d_chop : -Hexagon_di_df_Intrinsic<"HEXAGON.F2.conv.df2d.chop">; +Hexagon_di_df_Intrinsic<"HEXAGON_F2_conv_df2d_chop">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2) // def int_hexagon_S2_asr_r_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2) // def int_hexagon_S2_asl_r_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2) // def int_hexagon_S2_lsr_r_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.r.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_r_r">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2) // def int_hexagon_S2_lsl_r_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsl.r.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsl_r_r">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2) // def int_hexagon_S2_asr_r_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_p">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2) // def int_hexagon_S2_asl_r_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_p">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_r_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_p">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2) // def int_hexagon_S2_lsl_r_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_p">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_r_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_r_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_r_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_lsl_r_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_r_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_r_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_r_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsl_r_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_r_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_r_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_r_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_lsl_r_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_r_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_r_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_r_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsl_r_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_r_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_and">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_r_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_and">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_r_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_and">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_lsl_r_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_and">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_r_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_r_r_or">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_r_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_r_r_or">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_r_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_r_r_or">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_lsl_r_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsl_r_r_or">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_r_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_and">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_r_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_and">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_r_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_and">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsl_r_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_and">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_r_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_or">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_r_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_or">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_r_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_or">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsl_r_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_or">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_p_xor,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_r_p_xor : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.xor">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_r_p_xor">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_p_xor,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_r_p_xor : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.xor">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_r_p_xor">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_p_xor,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_r_p_xor : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.xor">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_r_p_xor">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_p_xor,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsl_r_p_xor : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.xor">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsl_r_p_xor">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2) // def int_hexagon_S2_asr_r_r_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_r_r_sat">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2) // def int_hexagon_S2_asl_r_r_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_r_r_sat">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2) // def int_hexagon_S2_asr_i_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2) // def int_hexagon_S2_lsr_i_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.i.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_lsr_i_r">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2) // def int_hexagon_S2_asl_i_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2) // def int_hexagon_S2_asr_i_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_i_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_p">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2) // def int_hexagon_S2_asl_i_p : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.p">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_p">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_i_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_i_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_i_r_acc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.acc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_acc">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_i_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_i_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_i_p_acc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.acc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_acc">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_i_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_i_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_i_r_nac : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.nac">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_nac">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_i_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_i_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_i_p_nac : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.nac">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_nac">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_i_r_xacc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.xacc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_xacc">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_i_r_xacc : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.xacc">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_xacc">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_i_p_xacc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.xacc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_i_p_xacc : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.xacc">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_xacc">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_i_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_and">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_i_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_and">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_i_r_and : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.and">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_and">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_asr_i_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asr_i_r_or">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_lsr_i_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_lsr_i_r_or">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3) // def int_hexagon_S2_asl_i_r_or : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.or">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_asl_i_r_or">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_i_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_and">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_i_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_and">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_i_p_and : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.and">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_and">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_asr_i_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asr_i_p_or">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_lsr_i_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_lsr_i_p_or">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3) // def int_hexagon_S2_asl_i_p_or : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.or">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_asl_i_p_or">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2) // def int_hexagon_S2_asl_i_r_sat : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r.sat">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asl_i_r_sat">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2) // def int_hexagon_S2_asr_i_r_rnd : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2) // def int_hexagon_S2_asr_i_r_rnd_goodsyntax : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd.goodsyntax">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_asr_i_r_rnd_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd,DI_ftype_DISI,2) // def int_hexagon_S2_asr_i_p_rnd : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p.rnd">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_p_rnd_goodsyntax,DI_ftype_DISI,2) // def int_hexagon_S2_asr_i_p_rnd_goodsyntax : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p.rnd.goodsyntax">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S4_lsli,SI_ftype_SISI,2) // def int_hexagon_S4_lsli : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S4.lsli">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_lsli">; // // BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3) // def int_hexagon_S2_addasl_rrri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.addasl.rrri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_addasl_rrri">; // // BUILTIN_INFO(HEXAGON.S4_andi_asl_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_andi_asl_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.andi.asl.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_asl_ri">; // // BUILTIN_INFO(HEXAGON.S4_ori_asl_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_ori_asl_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.ori.asl.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_asl_ri">; // // BUILTIN_INFO(HEXAGON.S4_addi_asl_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_addi_asl_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addi.asl.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_asl_ri">; // // BUILTIN_INFO(HEXAGON.S4_subi_asl_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_subi_asl_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subi.asl.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_asl_ri">; // // BUILTIN_INFO(HEXAGON.S4_andi_lsr_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_andi_lsr_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.andi.lsr.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_andi_lsr_ri">; // // BUILTIN_INFO(HEXAGON.S4_ori_lsr_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_ori_lsr_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.ori.lsr.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_ori_lsr_ri">; // // BUILTIN_INFO(HEXAGON.S4_addi_lsr_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_addi_lsr_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addi.lsr.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_addi_lsr_ri">; // // BUILTIN_INFO(HEXAGON.S4_subi_lsr_ri,SI_ftype_SISISI,3) // def int_hexagon_S4_subi_lsr_ri : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subi.lsr.ri">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_subi_lsr_ri">; // // BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3) // def int_hexagon_S2_valignib : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.valignib">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_valignib">; // // BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3) // def int_hexagon_S2_valignrb : -Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.valignrb">; +Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_valignrb">; // // BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3) // def int_hexagon_S2_vspliceib : -Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.vspliceib">; +Hexagon_di_didisi_Intrinsic<"HEXAGON_S2_vspliceib">; // // BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3) // def int_hexagon_S2_vsplicerb : -Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.vsplicerb">; +Hexagon_di_didiqi_Intrinsic<"HEXAGON_S2_vsplicerb">; // // BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1) // def int_hexagon_S2_vsplatrh : -Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsplatrh">; +Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsplatrh">; // // BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1) // def int_hexagon_S2_vsplatrb : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.vsplatrb">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_vsplatrb">; // // BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4) // def int_hexagon_S2_insert : -Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.insert">; +Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_insert">; // // BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4) // def int_hexagon_S2_tableidxb_goodsyntax : -Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxb.goodsyntax">; +Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxb_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4) // def int_hexagon_S2_tableidxh_goodsyntax : -Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxh.goodsyntax">; +Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxh_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4) // def int_hexagon_S2_tableidxw_goodsyntax : -Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxw.goodsyntax">; +Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxw_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4) // def int_hexagon_S2_tableidxd_goodsyntax : -Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxd.goodsyntax">; +Hexagon_si_sisisisi_Intrinsic<"HEXAGON_S2_tableidxd_goodsyntax">; // // BUILTIN_INFO(HEXAGON.A4_bitspliti,DI_ftype_SISI,2) // def int_hexagon_A4_bitspliti : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.bitspliti">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitspliti">; // // BUILTIN_INFO(HEXAGON.A4_bitsplit,DI_ftype_SISI,2) // def int_hexagon_A4_bitsplit : -Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.bitsplit">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_A4_bitsplit">; // // BUILTIN_INFO(HEXAGON.S4_extract,SI_ftype_SISISI,3) // def int_hexagon_S4_extract : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.extract">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S4_extract">; // // BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3) // def int_hexagon_S2_extractu : -Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.extractu">; +Hexagon_si_sisisi_Intrinsic<"HEXAGON_S2_extractu">; // // BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4) // def int_hexagon_S2_insertp : -Hexagon_di_didisisi_Intrinsic<"HEXAGON.S2.insertp">; +Hexagon_di_didisisi_Intrinsic<"HEXAGON_S2_insertp">; // // BUILTIN_INFO(HEXAGON.S4_extractp,DI_ftype_DISISI,3) // def int_hexagon_S4_extractp : -Hexagon_di_disisi_Intrinsic<"HEXAGON.S4.extractp">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_S4_extractp">; // // BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3) // def int_hexagon_S2_extractup : -Hexagon_di_disisi_Intrinsic<"HEXAGON.S2.extractup">; +Hexagon_di_disisi_Intrinsic<"HEXAGON_S2_extractup">; // // BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3) // def int_hexagon_S2_insert_rp : -Hexagon_si_sisidi_Intrinsic<"HEXAGON.S2.insert.rp">; +Hexagon_si_sisidi_Intrinsic<"HEXAGON_S2_insert_rp">; // // BUILTIN_INFO(HEXAGON.S4_extract_rp,SI_ftype_SIDI,2) // def int_hexagon_S4_extract_rp : -Hexagon_si_sidi_Intrinsic<"HEXAGON.S4.extract.rp">; +Hexagon_si_sidi_Intrinsic<"HEXAGON_S4_extract_rp">; // // BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2) // def int_hexagon_S2_extractu_rp : -Hexagon_si_sidi_Intrinsic<"HEXAGON.S2.extractu.rp">; +Hexagon_si_sidi_Intrinsic<"HEXAGON_S2_extractu_rp">; // // BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3) // def int_hexagon_S2_insertp_rp : -Hexagon_di_dididi_Intrinsic<"HEXAGON.S2.insertp.rp">; +Hexagon_di_dididi_Intrinsic<"HEXAGON_S2_insertp_rp">; // // BUILTIN_INFO(HEXAGON.S4_extractp_rp,DI_ftype_DIDI,2) // def int_hexagon_S4_extractp_rp : -Hexagon_di_didi_Intrinsic<"HEXAGON.S4.extractp.rp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S4_extractp_rp">; // // BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2) // def int_hexagon_S2_extractup_rp : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.extractup.rp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_extractup_rp">; // // BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2) // def int_hexagon_S2_tstbit_i : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.i">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_S2_tstbit_i">; // // BUILTIN_INFO(HEXAGON.S4_ntstbit_i,QI_ftype_SISI,2) // def int_hexagon_S4_ntstbit_i : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.S4.ntstbit.i">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_S4_ntstbit_i">; // // BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2) // def int_hexagon_S2_setbit_i : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.i">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_i">; // // BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2) // def int_hexagon_S2_togglebit_i : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.i">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_i">; // // BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2) // def int_hexagon_S2_clrbit_i : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.i">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_i">; // // BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2) // def int_hexagon_S2_tstbit_r : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.r">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_S2_tstbit_r">; // // BUILTIN_INFO(HEXAGON.S4_ntstbit_r,QI_ftype_SISI,2) // def int_hexagon_S4_ntstbit_r : -Hexagon_qi_sisi_Intrinsic<"HEXAGON.S4.ntstbit.r">; +Hexagon_qi_sisi_Intrinsic<"HEXAGON_S4_ntstbit_r">; // // BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2) // def int_hexagon_S2_setbit_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_setbit_r">; // // BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2) // def int_hexagon_S2_togglebit_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_togglebit_r">; // // BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2) // def int_hexagon_S2_clrbit_r : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.r">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S2_clrbit_r">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2) // def int_hexagon_S2_asr_i_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vh">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_i_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vh">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2) // def int_hexagon_S2_asl_i_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vh">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2) // def int_hexagon_S2_asr_r_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vh">; // // BUILTIN_INFO(HEXAGON.S5_asrhub_rnd_sat_goodsyntax,SI_ftype_DISI,2) // def int_hexagon_S5_asrhub_rnd_sat_goodsyntax : -Hexagon_si_disi_Intrinsic<"HEXAGON.S5.asrhub.rnd.sat.goodsyntax">; +Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_rnd_sat_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S5_asrhub_sat,SI_ftype_DISI,2) // def int_hexagon_S5_asrhub_sat : -Hexagon_si_disi_Intrinsic<"HEXAGON.S5.asrhub.sat">; +Hexagon_si_disi_Intrinsic<"HEXAGON_S5_asrhub_sat">; // // BUILTIN_INFO(HEXAGON.S5_vasrhrnd_goodsyntax,DI_ftype_DISI,2) // def int_hexagon_S5_vasrhrnd_goodsyntax : -Hexagon_di_disi_Intrinsic<"HEXAGON.S5.vasrhrnd.goodsyntax">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S5_vasrhrnd_goodsyntax">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2) // def int_hexagon_S2_asl_r_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vh">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_r_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vh">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2) // def int_hexagon_S2_lsl_r_vh : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vh">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vh">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2) // def int_hexagon_S2_asr_i_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_i_vw">; // // BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2) // def int_hexagon_S2_asr_i_svw_trun : -Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.i.svw.trun">; +Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_i_svw_trun">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2) // def int_hexagon_S2_asr_r_svw_trun : -Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.r.svw.trun">; +Hexagon_si_disi_Intrinsic<"HEXAGON_S2_asr_r_svw_trun">; // // BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_i_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_i_vw">; // // BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2) // def int_hexagon_S2_asl_i_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_i_vw">; // // BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2) // def int_hexagon_S2_asr_r_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asr_r_vw">; // // BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2) // def int_hexagon_S2_asl_r_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_asl_r_vw">; // // BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2) // def int_hexagon_S2_lsr_r_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsr_r_vw">; // // BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2) // def int_hexagon_S2_lsl_r_vw : -Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vw">; +Hexagon_di_disi_Intrinsic<"HEXAGON_S2_lsl_r_vw">; // // BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1) // def int_hexagon_S2_vrndpackwh : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwh">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwh">; // // BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1) // def int_hexagon_S2_vrndpackwhs : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwhs">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vrndpackwhs">; // // BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1) // def int_hexagon_S2_vsxtbh : -Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxtbh">; +Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxtbh">; // // BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1) // def int_hexagon_S2_vzxtbh : -Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxtbh">; +Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxtbh">; // // BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1) // def int_hexagon_S2_vsathub : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathub">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathub">; // // BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1) // def int_hexagon_S2_svsathub : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathub">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathub">; // // BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1) // def int_hexagon_S2_svsathb : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathb">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_svsathb">; // // BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1) // def int_hexagon_S2_vsathb : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathb">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsathb">; // // BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1) // def int_hexagon_S2_vtrunohb : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunohb">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunohb">; // // BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2) // def int_hexagon_S2_vtrunewh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunewh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunewh">; // // BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2) // def int_hexagon_S2_vtrunowh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunowh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_vtrunowh">; // // BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1) // def int_hexagon_S2_vtrunehb : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunehb">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vtrunehb">; // // BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1) // def int_hexagon_S2_vsxthw : -Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxthw">; +Hexagon_di_si_Intrinsic<"HEXAGON_S2_vsxthw">; // // BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1) // def int_hexagon_S2_vzxthw : -Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxthw">; +Hexagon_di_si_Intrinsic<"HEXAGON_S2_vzxthw">; // // BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1) // def int_hexagon_S2_vsatwh : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwh">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwh">; // // BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1) // def int_hexagon_S2_vsatwuh : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwuh">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_vsatwuh">; // // BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2) // def int_hexagon_S2_packhl : -Hexagon_di_sisi_Intrinsic<"HEXAGON.S2.packhl">; +Hexagon_di_sisi_Intrinsic<"HEXAGON_S2_packhl">; // // BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1) // def int_hexagon_A2_swiz : -Hexagon_si_si_Intrinsic<"HEXAGON.A2.swiz">; +Hexagon_si_si_Intrinsic<"HEXAGON_A2_swiz">; // // BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1) // def int_hexagon_S2_vsathub_nopack : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathub.nopack">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathub_nopack">; // // BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1) // def int_hexagon_S2_vsathb_nopack : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathb.nopack">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsathb_nopack">; // // BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1) // def int_hexagon_S2_vsatwh_nopack : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwh.nopack">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwh_nopack">; // // BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1) // def int_hexagon_S2_vsatwuh_nopack : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwuh.nopack">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_vsatwuh_nopack">; // // BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2) // def int_hexagon_S2_shuffob : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffob">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffob">; // // BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2) // def int_hexagon_S2_shuffeb : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeb">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeb">; // // BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2) // def int_hexagon_S2_shuffoh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffoh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffoh">; // // BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2) // def int_hexagon_S2_shuffeh : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeh">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_shuffeh">; // // BUILTIN_INFO(HEXAGON.S5_popcountp,SI_ftype_DI,1) // def int_hexagon_S5_popcountp : -Hexagon_si_di_Intrinsic<"HEXAGON.S5.popcountp">; +Hexagon_si_di_Intrinsic<"HEXAGON_S5_popcountp">; // // BUILTIN_INFO(HEXAGON.S4_parity,SI_ftype_SISI,2) // def int_hexagon_S4_parity : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S4.parity">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_parity">; // // BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2) // def int_hexagon_S2_parityp : -Hexagon_si_didi_Intrinsic<"HEXAGON.S2.parityp">; +Hexagon_si_didi_Intrinsic<"HEXAGON_S2_parityp">; // // BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2) // def int_hexagon_S2_lfsp : -Hexagon_di_didi_Intrinsic<"HEXAGON.S2.lfsp">; +Hexagon_di_didi_Intrinsic<"HEXAGON_S2_lfsp">; // // BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1) // def int_hexagon_S2_clbnorm : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.clbnorm">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_clbnorm">; // // BUILTIN_INFO(HEXAGON.S4_clbaddi,SI_ftype_SISI,2) // def int_hexagon_S4_clbaddi : -Hexagon_si_sisi_Intrinsic<"HEXAGON.S4.clbaddi">; +Hexagon_si_sisi_Intrinsic<"HEXAGON_S4_clbaddi">; // // BUILTIN_INFO(HEXAGON.S4_clbpnorm,SI_ftype_DI,1) // def int_hexagon_S4_clbpnorm : -Hexagon_si_di_Intrinsic<"HEXAGON.S4.clbpnorm">; +Hexagon_si_di_Intrinsic<"HEXAGON_S4_clbpnorm">; // // BUILTIN_INFO(HEXAGON.S4_clbpaddi,SI_ftype_DISI,2) // def int_hexagon_S4_clbpaddi : -Hexagon_si_disi_Intrinsic<"HEXAGON.S4.clbpaddi">; +Hexagon_si_disi_Intrinsic<"HEXAGON_S4_clbpaddi">; // // BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1) // def int_hexagon_S2_clb : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.clb">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_clb">; // // BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1) // def int_hexagon_S2_cl0 : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl0">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl0">; // // BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1) // def int_hexagon_S2_cl1 : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl1">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_cl1">; // // BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1) // def int_hexagon_S2_clbp : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.clbp">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_clbp">; // // BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1) // def int_hexagon_S2_cl0p : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl0p">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl0p">; // // BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1) // def int_hexagon_S2_cl1p : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl1p">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_cl1p">; // // BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1) // def int_hexagon_S2_brev : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.brev">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_brev">; // // BUILTIN_INFO(HEXAGON.S2_brevp,DI_ftype_DI,1) // def int_hexagon_S2_brevp : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.brevp">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_brevp">; // // BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1) // def int_hexagon_S2_ct0 : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct0">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct0">; // // BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1) // def int_hexagon_S2_ct1 : -Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct1">; +Hexagon_si_si_Intrinsic<"HEXAGON_S2_ct1">; // // BUILTIN_INFO(HEXAGON.S2_ct0p,SI_ftype_DI,1) // def int_hexagon_S2_ct0p : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.ct0p">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct0p">; // // BUILTIN_INFO(HEXAGON.S2_ct1p,SI_ftype_DI,1) // def int_hexagon_S2_ct1p : -Hexagon_si_di_Intrinsic<"HEXAGON.S2.ct1p">; +Hexagon_si_di_Intrinsic<"HEXAGON_S2_ct1p">; // // BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1) // def int_hexagon_S2_interleave : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.interleave">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">; // // BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1) // def int_hexagon_S2_deinterleave : -Hexagon_di_di_Intrinsic<"HEXAGON.S2.deinterleave">; +Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">; diff --git a/include/llvm/IntrinsicsMips.td b/include/llvm/IntrinsicsMips.td new file mode 100644 index 0000000000..e260a37243 --- /dev/null +++ b/include/llvm/IntrinsicsMips.td @@ -0,0 +1,286 @@ +//===- IntrinsicsMips.td - Defines Mips intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the MIPS-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MIPS DSP data types +def mips_v2q15_ty: LLVMType<v2i16>; +def mips_q31_ty: LLVMType<i32>; + +let TargetPrefix = "mips" in { // All intrinsics start with "llvm.mips.". + +//===----------------------------------------------------------------------===// +// Addition/subtraction + +def int_mips_addu_qb : GCCBuiltin<"__builtin_mips_addu_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_addu_s_qb : GCCBuiltin<"__builtin_mips_addu_s_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_subu_qb : GCCBuiltin<"__builtin_mips_subu_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_subu_s_qb : GCCBuiltin<"__builtin_mips_subu_s_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; + +def int_mips_addq_ph : GCCBuiltin<"__builtin_mips_addq_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_addq_s_ph : GCCBuiltin<"__builtin_mips_addq_s_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_subq_ph : GCCBuiltin<"__builtin_mips_subq_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_subq_s_ph : GCCBuiltin<"__builtin_mips_subq_s_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; + +def int_mips_madd: GCCBuiltin<"__builtin_mips_madd">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; +def int_mips_maddu: GCCBuiltin<"__builtin_mips_maddu">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; + +def int_mips_msub: GCCBuiltin<"__builtin_mips_msub">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_mips_msubu: GCCBuiltin<"__builtin_mips_msubu">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_mips_addq_s_w: GCCBuiltin<"__builtin_mips_addq_s_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], + [IntrNoMem, Commutative]>; +def int_mips_subq_s_w: GCCBuiltin<"__builtin_mips_subq_s_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; + +def int_mips_addsc: GCCBuiltin<"__builtin_mips_addsc">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; +def int_mips_addwc: GCCBuiltin<"__builtin_mips_addwc">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; + +def int_mips_modsub: GCCBuiltin<"__builtin_mips_modsub">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_mips_raddu_w_qb: GCCBuiltin<"__builtin_mips_raddu_w_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Absolute value + +def int_mips_absq_s_ph: GCCBuiltin<"__builtin_mips_absq_s_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty], [IntrNoMem]>; +def int_mips_absq_s_w: GCCBuiltin<"__builtin_mips_absq_s_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Precision reduce/expand + +def int_mips_precrq_qb_ph: GCCBuiltin<"__builtin_mips_precrq_qb_ph">, + Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_precrqu_s_qb_ph: GCCBuiltin<"__builtin_mips_precrqu_s_qb_ph">, + Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_precrq_ph_w: GCCBuiltin<"__builtin_mips_precrq_ph_w">, + Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; +def int_mips_precrq_rs_ph_w: GCCBuiltin<"__builtin_mips_precrq_rs_ph_w">, + Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; +def int_mips_preceq_w_phl: GCCBuiltin<"__builtin_mips_preceq_w_phl">, + Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>; +def int_mips_preceq_w_phr: GCCBuiltin<"__builtin_mips_preceq_w_phr">, + Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>; +def int_mips_precequ_ph_qbl: GCCBuiltin<"__builtin_mips_precequ_ph_qbl">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_precequ_ph_qbr: GCCBuiltin<"__builtin_mips_precequ_ph_qbr">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_precequ_ph_qbla: GCCBuiltin<"__builtin_mips_precequ_ph_qbla">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_precequ_ph_qbra: GCCBuiltin<"__builtin_mips_precequ_ph_qbra">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_preceu_ph_qbl: GCCBuiltin<"__builtin_mips_preceu_ph_qbl">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_preceu_ph_qbr: GCCBuiltin<"__builtin_mips_preceu_ph_qbr">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_preceu_ph_qbla: GCCBuiltin<"__builtin_mips_preceu_ph_qbla">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_preceu_ph_qbra: GCCBuiltin<"__builtin_mips_preceu_ph_qbra">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Shift + +def int_mips_shll_qb: GCCBuiltin<"__builtin_mips_shll_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shrl_qb: GCCBuiltin<"__builtin_mips_shrl_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shll_ph: GCCBuiltin<"__builtin_mips_shll_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shll_s_ph: GCCBuiltin<"__builtin_mips_shll_s_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shra_ph: GCCBuiltin<"__builtin_mips_shra_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shra_r_ph: GCCBuiltin<"__builtin_mips_shra_r_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shll_s_w: GCCBuiltin<"__builtin_mips_shll_s_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shra_r_w: GCCBuiltin<"__builtin_mips_shra_r_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shilo: GCCBuiltin<"__builtin_mips_shilo">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Multiplication + +def int_mips_muleu_s_ph_qbl: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbl">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_muleu_s_ph_qbr: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbr">, + Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_mulq_rs_ph: GCCBuiltin<"__builtin_mips_mulq_rs_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_muleq_s_w_phl: GCCBuiltin<"__builtin_mips_muleq_s_w_phl">, + Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_muleq_s_w_phr: GCCBuiltin<"__builtin_mips_muleq_s_w_phr">, + Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_mulsaq_s_w_ph: GCCBuiltin<"__builtin_mips_mulsaq_s_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_maq_s_w_phl: GCCBuiltin<"__builtin_mips_maq_s_w_phl">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_maq_s_w_phr: GCCBuiltin<"__builtin_mips_maq_s_w_phr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_maq_sa_w_phl: GCCBuiltin<"__builtin_mips_maq_sa_w_phl">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_maq_sa_w_phr: GCCBuiltin<"__builtin_mips_maq_sa_w_phr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_mult: GCCBuiltin<"__builtin_mips_mult">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; +def int_mips_multu: GCCBuiltin<"__builtin_mips_multu">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative]>; + +//===----------------------------------------------------------------------===// +// Dot product with accumulate/subtract + +def int_mips_dpau_h_qbl: GCCBuiltin<"__builtin_mips_dpau_h_qbl">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem]>; +def int_mips_dpau_h_qbr: GCCBuiltin<"__builtin_mips_dpau_h_qbr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem]>; +def int_mips_dpsu_h_qbl: GCCBuiltin<"__builtin_mips_dpsu_h_qbl">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem]>; +def int_mips_dpsu_h_qbr: GCCBuiltin<"__builtin_mips_dpsu_h_qbr">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem]>; +def int_mips_dpaq_s_w_ph: GCCBuiltin<"__builtin_mips_dpaq_s_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_dpsq_s_w_ph: GCCBuiltin<"__builtin_mips_dpsq_s_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem]>; +def int_mips_dpaq_sa_l_w: GCCBuiltin<"__builtin_mips_dpaq_sa_l_w">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], + [IntrNoMem]>; +def int_mips_dpsq_sa_l_w: GCCBuiltin<"__builtin_mips_dpsq_sa_l_w">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], + [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Comparison + +def int_mips_cmpu_eq_qb: GCCBuiltin<"__builtin_mips_cmpu_eq_qb">, + Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>; +def int_mips_cmpu_lt_qb: GCCBuiltin<"__builtin_mips_cmpu_lt_qb">, + Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>; +def int_mips_cmpu_le_qb: GCCBuiltin<"__builtin_mips_cmpu_le_qb">, + Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>; +def int_mips_cmpgu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgu_eq_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_cmpgu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgu_lt_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_cmpgu_le_qb: GCCBuiltin<"__builtin_mips_cmpgu_le_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_cmp_eq_ph: GCCBuiltin<"__builtin_mips_cmp_eq_ph">, + Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>; +def int_mips_cmp_lt_ph: GCCBuiltin<"__builtin_mips_cmp_lt_ph">, + Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>; +def int_mips_cmp_le_ph: GCCBuiltin<"__builtin_mips_cmp_le_ph">, + Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>; + +//===----------------------------------------------------------------------===// +// Extracting + +def int_mips_extr_s_h: GCCBuiltin<"__builtin_mips_extr_s_h">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_extr_w: GCCBuiltin<"__builtin_mips_extr_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_extr_rs_w: GCCBuiltin<"__builtin_mips_extr_rs_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_extr_r_w: GCCBuiltin<"__builtin_mips_extr_r_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_extp: GCCBuiltin<"__builtin_mips_extp">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// Misc + +def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_bitrev: GCCBuiltin<"__builtin_mips_bitrev">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_mips_packrl_ph: GCCBuiltin<"__builtin_mips_packrl_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; + +def int_mips_repl_qb: GCCBuiltin<"__builtin_mips_repl_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_mips_repl_ph: GCCBuiltin<"__builtin_mips_repl_ph">, + Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_mips_pick_qb: GCCBuiltin<"__builtin_mips_pick_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_pick_ph: GCCBuiltin<"__builtin_mips_pick_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; + +def int_mips_mthlip: GCCBuiltin<"__builtin_mips_mthlip">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_mips_bposge32: GCCBuiltin<"__builtin_mips_bposge32">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_mips_lbux: GCCBuiltin<"__builtin_mips_lbux">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>; +def int_mips_lhx: GCCBuiltin<"__builtin_mips_lhx">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>; +def int_mips_lwx: GCCBuiltin<"__builtin_mips_lwx">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>; +} diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index fe53b0aaf8..14fd76d213 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1744,6 +1744,75 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; } +// Gather ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + + def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrReadMem]>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 619b4939f2..03da47c2ce 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -323,17 +323,9 @@ namespace llvm { /// DwarfSectionOffsetDirective - Special section offset directive. const char* DwarfSectionOffsetDirective; // Defaults to NULL - /// DwarfRequiresRelocationForSectionOffset - True if we need to produce a - /// relocation when we want a section offset in dwarf. - bool DwarfRequiresRelocationForSectionOffset; // Defaults to true; - - /// DwarfUsesLabelOffsetDifference - True if Dwarf2 output can - /// use EmitLabelOffsetDifference. - bool DwarfUsesLabelOffsetForRanges; - - /// DwarfUsesRelocationsForStringPool - True if this Dwarf output must use - /// relocations to refer to entries in the string pool. - bool DwarfUsesRelocationsForStringPool; + /// DwarfUsesRelocationsAcrossSections - True if Dwarf2 output generally + /// uses relocations for references to other .debug_* sections. + bool DwarfUsesRelocationsAcrossSections; /// DwarfRegNumForCFI - True if dwarf register numbers are printed /// instead of symbolic register names in .cfi_* directives. @@ -564,14 +556,8 @@ namespace llvm { const char *getDwarfSectionOffsetDirective() const { return DwarfSectionOffsetDirective; } - bool doesDwarfRequireRelocationForSectionOffset() const { - return DwarfRequiresRelocationForSectionOffset; - } - bool doesDwarfUseLabelOffsetForRanges() const { - return DwarfUsesLabelOffsetForRanges; - } - bool doesDwarfUseRelocationsForStringPool() const { - return DwarfUsesRelocationsForStringPool; + bool doesDwarfUseRelocationsAcrossSections() const { + return DwarfUsesRelocationsAcrossSections; } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 5718feca88..688c8a9575 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -54,11 +54,13 @@ class MCELFObjectTargetWriter { const uint16_t EMachine; const unsigned HasRelocationAddend : 1; const unsigned Is64Bit : 1; + const unsigned IsN64 : 1; protected: MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, - uint16_t EMachine_, bool HasRelocationAddend_); + uint16_t EMachine_, bool HasRelocationAddend, + bool IsN64=false); public: static uint8_t getOSABI(Triple::OSType OSType) { @@ -101,7 +103,47 @@ public: uint16_t getEMachine() { return EMachine; } bool hasRelocationAddend() { return HasRelocationAddend; } bool is64Bit() const { return Is64Bit; } + bool isN64() const { return IsN64; } /// @} + + // Instead of changing everyone's API we pack the N64 Type fields + // into the existing 32 bit data unsigned. +#define R_TYPE_SHIFT 0 +#define R_TYPE_MASK 0xffffff00 +#define R_TYPE2_SHIFT 8 +#define R_TYPE2_MASK 0xffff00ff +#define R_TYPE3_SHIFT 16 +#define R_TYPE3_MASK 0xff00ffff +#define R_SSYM_SHIFT 24 +#define R_SSYM_MASK 0x00ffffff + + // N64 relocation type accessors + unsigned getRType(uint32_t Type) const { + return (unsigned)((Type >> R_TYPE_SHIFT) & 0xff); + } + unsigned getRType2(uint32_t Type) const { + return (unsigned)((Type >> R_TYPE2_SHIFT) & 0xff); + } + unsigned getRType3(uint32_t Type) const { + return (unsigned)((Type >> R_TYPE3_SHIFT) & 0xff); + } + unsigned getRSsym(uint32_t Type) const { + return (unsigned)((Type >> R_SSYM_SHIFT) & 0xff); + } + + // N64 relocation type setting + unsigned setRType(unsigned Value, unsigned Type) const { + return ((Type & R_TYPE_MASK) | ((Value & 0xff) << R_TYPE_SHIFT)); + } + unsigned setRType2(unsigned Value, unsigned Type) const { + return (Type & R_TYPE2_MASK) | ((Value & 0xff) << R_TYPE2_SHIFT); + } + unsigned setRType3(unsigned Value, unsigned Type) const { + return (Type & R_TYPE3_MASK) | ((Value & 0xff) << R_TYPE3_SHIFT); + } + unsigned setRSsym(unsigned Value, unsigned Type) const { + return (Type & R_SSYM_MASK) | ((Value & 0xff) << R_SSYM_SHIFT); + } }; /// \brief Construct a new ELF writer instance. diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h index 05baddd918..d8587068ae 100644 --- a/include/llvm/MC/MCInstrItineraries.h +++ b/include/llvm/MC/MCInstrItineraries.h @@ -95,7 +95,7 @@ struct InstrStage { /// operands are read and written. /// struct InstrItinerary { - unsigned NumMicroOps; ///< # of micro-ops, 0 means it's variable + int NumMicroOps; ///< # of micro-ops, -1 means it's variable unsigned FirstStage; ///< Index of first stage in itinerary unsigned LastStage; ///< Index of last + 1 stage in itinerary unsigned FirstOperandCycle; ///< Index of first operand rd/wr @@ -313,16 +313,16 @@ public: return UseCycle; } - /// isMicroCoded - Return true if the instructions in the given class decode - /// to more than one micro-ops. - bool isMicroCoded(unsigned ItinClassIndx) const { + /// getNumMicroOps - Return the number of micro-ops that the given class + /// decodes to. Return -1 for classes that require dynamic lookup via + /// TargetInstrInfo. + int getNumMicroOps(unsigned ItinClassIndx) const { if (isEmpty()) - return false; - return Itineraries[ItinClassIndx].NumMicroOps != 1; + return 1; + return Itineraries[ItinClassIndx].NumMicroOps; } }; - } // End llvm namespace #endif diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 99736c92a5..970bf4626b 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -326,7 +326,7 @@ namespace llvm { /// @param ByteAlignment - The alignment of the zerofill symbol if /// non-zero. This must be a power of 2 on some targets. virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0,unsigned ByteAlignment = 0) = 0; + uint64_t Size = 0,unsigned ByteAlignment = 0) = 0; /// EmitTBSSSymbol - Emit a thread local bss (.tbss) symbol. /// diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h index 73579861ec..b40549bed6 100644 --- a/include/llvm/Metadata.h +++ b/include/llvm/Metadata.h @@ -165,6 +165,11 @@ public: static bool classof(const Value *V) { return V->getValueID() == MDNodeVal; } + + /// Methods for metadata merging. + static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B); + static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B); + static MDNode *getMostGenericRange(MDNode *A, MDNode *B); private: // destroy - Delete this node. Only when there are no uses. void destroy(); diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index 68b5ca1bc7..967420ec9f 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -168,6 +168,10 @@ public: virtual section_iterator begin_sections() const; virtual section_iterator end_sections() const; + const coff_section *getCOFFSection(section_iterator &It) const; + const coff_symbol *getCOFFSymbol(symbol_iterator &It) const; + const coff_relocation *getCOFFRelocation(relocation_iterator &It) const; + virtual uint8_t getBytesInAddress() const; virtual StringRef getFileFormatName() const; virtual unsigned getArch() const; @@ -184,6 +188,8 @@ public: return ec; } error_code getSymbolName(const coff_symbol *symbol, StringRef &Res) const; + ArrayRef<uint8_t> getSymbolAuxData(const coff_symbol *symbol) const; + error_code getSectionName(const coff_section *Sec, StringRef &Res) const; error_code getSectionContents(const coff_section *Sec, ArrayRef<uint8_t> &Res) const; diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 4e6f50d97a..cafcb5ed3d 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -1414,6 +1414,98 @@ error_code ELFObjectFile<target_endianness, is64Bits> res = "Unknown"; } break; + case ELF::EM_HEXAGON: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B32_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_12_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_10_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_9_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_7_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_JMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_PLT_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPMOD_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_PLT_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_11_X); + default: + res = "Unknown"; + } + break; default: res = "Unknown"; } @@ -1489,6 +1581,9 @@ error_code ELFObjectFile<target_endianness, is64Bits> res = "Unknown"; } break; + case ELF::EM_HEXAGON: + res = symname; + break; default: res = "Unknown"; } @@ -1888,6 +1983,8 @@ StringRef ELFObjectFile<target_endianness, is64Bits> return "ELF32-x86-64"; case ELF::EM_ARM: return "ELF32-arm"; + case ELF::EM_HEXAGON: + return "ELF32-hexagon"; default: return "ELF32-unknown"; } @@ -1915,6 +2012,8 @@ unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const { return Triple::x86_64; case ELF::EM_ARM: return Triple::arm; + case ELF::EM_HEXAGON: + return Triple::hexagon; default: return Triple::UnknownArch; } diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 4dd7fb5813..2ec656b012 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -76,13 +76,13 @@ public: } }; -static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) { +inline bool operator ==(const DataRefImpl &a, const DataRefImpl &b) { // Check bitwise identical. This is the only legal way to compare a union w/o // knowing which member is in use. return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0; } -static bool operator <(const DataRefImpl &a, const DataRefImpl &b) { +inline bool operator <(const DataRefImpl &a, const DataRefImpl &b) { // Check bitwise identical. This is the only legal way to compare a union w/o // knowing which member is in use. return std::memcmp(&a, &b, sizeof(DataRefImpl)) < 0; @@ -126,6 +126,8 @@ public: /// /// This is for display purposes only. error_code getValueString(SmallVectorImpl<char> &Result) const; + + DataRefImpl getRawDataRefImpl() const; }; typedef content_iterator<RelocationRef> relocation_iterator; @@ -570,6 +572,11 @@ inline error_code RelocationRef::getValueString(SmallVectorImpl<char> &Result) inline error_code RelocationRef::getHidden(bool &Result) const { return OwningObject->getRelocationHidden(RelocationPimpl, Result); } + +inline DataRefImpl RelocationRef::getRawDataRefImpl() const { + return RelocationPimpl; +} + // Inline function definitions. inline LibraryRef::LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner) : LibraryPimpl(LibraryP) diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index cebfa7982d..85607c8448 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -15,6 +15,9 @@ #ifndef LLVM_SUPPORT_ALIGNOF_H #define LLVM_SUPPORT_ALIGNOF_H +#include "llvm/Support/Compiler.h" +#include <cstddef> + namespace llvm { template <typename T> @@ -54,7 +57,94 @@ struct AlignOf { /// class besides some cosmetic cleanliness. Example usage: /// alignOf<int>() returns the alignment of an int. template <typename T> -static inline unsigned alignOf() { return AlignOf<T>::Alignment; } +inline unsigned alignOf() { return AlignOf<T>::Alignment; } + + +/// \brief Helper for building an aligned character array type. +/// +/// This template is used to explicitly build up a collection of aligned +/// character types. We have to build these up using a macro and explicit +/// specialization to cope with old versions of MSVC and GCC where only an +/// integer literal can be used to specify an alignment constraint. Once built +/// up here, we can then begin to indirect between these using normal C++ +/// template parameters. +template <size_t Alignment> struct AlignedCharArrayImpl {}; +template <> struct AlignedCharArrayImpl<0> { + typedef char type; +}; +#if __has_feature(cxx_alignas) +#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ + template <> struct AlignedCharArrayImpl<x> { \ + typedef char alignas(x) type; \ + } +#elif defined(__clang__) || defined(__GNUC__) +#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ + template <> struct AlignedCharArrayImpl<x> { \ + typedef char type __attribute__((aligned(x))); \ + } +#elif defined(_MSC_VER) +#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ + template <> struct AlignedCharArrayImpl<x> { \ + typedef __declspec(align(x)) char type; \ + } +#else +# error No supported align as directive. +#endif + +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(512); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1024); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2048); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4096); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192); +// Any larger and MSVC complains. +#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT + +/// \brief This class template exposes a typedef for type containing a suitable +/// aligned character array to hold elements of any of up to four types. +/// +/// These types may be arrays, structs, or any other types. The goal is to +/// produce a union type containing a character array which, when used, forms +/// storage suitable to placement new any of these types over. Support for more +/// than four types can be added at the cost of more boiler plate. +template <typename T1, + typename T2 = char, typename T3 = char, typename T4 = char> +class AlignedCharArray { + class AlignerImpl { + T1 t1; T2 t2; T3 t3; T4 t4; + + AlignerImpl(); // Never defined or instantiated. + }; + union SizerImpl { + char arr1[sizeof(T1)], arr2[sizeof(T2)], arr3[sizeof(T3)], arr4[sizeof(T4)]; + }; + +public: + // Sadly, Clang and GCC both fail to align a character array properly even + // with an explicit alignment attribute. To work around this, we union + // the character array that will actually be used with a struct that contains + // a single aligned character member. Tests seem to indicate that both Clang + // and GCC will properly register the alignment of a struct containing an + // aligned member, and this alignment should carry over to the character + // array in the union. + union union_type { + // This is the only member of the union which should be used by clients: + char buffer[sizeof(SizerImpl)]; + + // This member of the union only exists to force the alignment. + struct { + typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type + nonce_inner_member; + } nonce_member; + }; +}; } // end namespace llvm #endif diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index 20634ede76..c23bb6a97d 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -184,6 +184,11 @@ public: CALLSITE_DELEGATE_SETTER(setAttributes(PAL)); } + /// \brief Return true if this function has the given attribute. + bool hasFnAttr(Attributes N) const { + CALLSITE_DELEGATE_GETTER(hasFnAttr(N)); + } + /// paramHasAttr - whether the call or the callee has the given attribute. bool paramHasAttr(uint16_t i, Attributes attr) const { CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr)); diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h index ced3a2cf2d..90dd69fa47 100644 --- a/include/llvm/Support/ConstantRange.h +++ b/include/llvm/Support/ConstantRange.h @@ -155,6 +155,10 @@ public: /// constant range. ConstantRange subtract(const APInt &CI) const; + /// \brief Subtract the specified range from this range (aka relative + /// complement of the sets). + ConstantRange difference(const ConstantRange &CR) const; + /// intersectWith - Return the range that results from the intersection of /// this range with another range. The resultant range is guaranteed to /// include all elements contained in both input ranges, and to have the diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index 9373958648..25945cb3ec 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -675,6 +675,97 @@ enum { R_MIPS_NUM = 218 }; +// ELF Relocation types for Hexagon +// Release 5 ABI - Document: 80-V9418-3 Rev. J +enum { + R_HEX_NONE = 0, + R_HEX_B22_PCREL = 1, + R_HEX_B15_PCREL = 2, + R_HEX_B7_PCREL = 3, + R_HEX_LO16 = 4, + R_HEX_HI16 = 5, + R_HEX_32 = 6, + R_HEX_16 = 7, + R_HEX_8 = 8, + R_HEX_GPREL16_0 = 9, + R_HEX_GPREL16_1 = 10, + R_HEX_GPREL16_2 = 11, + R_HEX_GPREL16_3 = 12, + R_HEX_HL16 = 13, + R_HEX_B13_PCREL = 14, + R_HEX_B9_PCREL = 15, + R_HEX_B32_PCREL_X = 16, + R_HEX_32_6_X = 17, + R_HEX_B22_PCREL_X = 18, + R_HEX_B15_PCREL_X = 19, + R_HEX_B13_PCREL_X = 20, + R_HEX_B9_PCREL_X = 21, + R_HEX_B7_PCREL_X = 22, + R_HEX_16_X = 23, + R_HEX_12_X = 24, + R_HEX_11_X = 25, + R_HEX_10_X = 26, + R_HEX_9_X = 27, + R_HEX_8_X = 28, + R_HEX_7_X = 29, + R_HEX_6_X = 30, + R_HEX_32_PCREL = 31, + R_HEX_COPY = 32, + R_HEX_GLOB_DAT = 33, + R_HEX_JMP_SLOT = 34, + R_HEX_RELATIVE = 35, + R_HEX_PLT_B22_PCREL = 36, + R_HEX_GOTREL_LO16 = 37, + R_HEX_GOTREL_HI16 = 38, + R_HEX_GOTREL_32 = 39, + R_HEX_GOT_LO16 = 40, + R_HEX_GOT_HI16 = 41, + R_HEX_GOT_32 = 42, + R_HEX_GOT_16 = 43, + R_HEX_DTPMOD_32 = 44, + R_HEX_DTPREL_LO16 = 45, + R_HEX_DTPREL_HI16 = 46, + R_HEX_DTPREL_32 = 47, + R_HEX_DTPREL_16 = 48, + R_HEX_GD_PLT_B22_PCREL = 49, + R_HEX_GD_GOT_LO16 = 50, + R_HEX_GD_GOT_HI16 = 51, + R_HEX_GD_GOT_32 = 52, + R_HEX_GD_GOT_16 = 53, + R_HEX_IE_LO16 = 54, + R_HEX_IE_HI16 = 55, + R_HEX_IE_32 = 56, + R_HEX_IE_GOT_LO16 = 57, + R_HEX_IE_GOT_HI16 = 58, + R_HEX_IE_GOT_32 = 59, + R_HEX_IE_GOT_16 = 60, + R_HEX_TPREL_LO16 = 61, + R_HEX_TPREL_HI16 = 62, + R_HEX_TPREL_32 = 63, + R_HEX_TPREL_16 = 64, + R_HEX_6_PCREL_X = 65, + R_HEX_GOTREL_32_6_X = 66, + R_HEX_GOTREL_16_X = 67, + R_HEX_GOTREL_11_X = 68, + R_HEX_GOT_32_6_X = 69, + R_HEX_GOT_16_X = 70, + R_HEX_GOT_11_X = 71, + R_HEX_DTPREL_32_6_X = 72, + R_HEX_DTPREL_16_X = 73, + R_HEX_DTPREL_11_X = 74, + R_HEX_GD_GOT_32_6_X = 75, + R_HEX_GD_GOT_16_X = 76, + R_HEX_GD_GOT_11_X = 77, + R_HEX_IE_32_6_X = 78, + R_HEX_IE_16_X = 79, + R_HEX_IE_GOT_32_6_X = 80, + R_HEX_IE_GOT_16_X = 81, + R_HEX_IE_GOT_11_X = 82, + R_HEX_TPREL_32_6_X = 83, + R_HEX_TPREL_16_X = 84, + R_HEX_TPREL_11_X = 85 +}; + // Section header. struct Elf32_Shdr { Elf32_Word sh_name; // Section name (index into string table) diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index 733ab7548f..8d5649dc1f 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -49,7 +49,7 @@ struct alignment_access_helper<value_type, unaligned> namespace endian { template<typename value_type, alignment align> - static value_type read_le(const void *memory) { + inline value_type read_le(const void *memory) { value_type t = reinterpret_cast<const detail::alignment_access_helper <value_type, align> *>(memory)->val; @@ -59,7 +59,7 @@ namespace endian { } template<typename value_type, alignment align> - static void write_le(void *memory, value_type value) { + inline void write_le(void *memory, value_type value) { if (sys::isBigEndianHost()) value = sys::SwapByteOrder(value); reinterpret_cast<detail::alignment_access_helper<value_type, align> *> @@ -67,7 +67,7 @@ namespace endian { } template<typename value_type, alignment align> - static value_type read_be(const void *memory) { + inline value_type read_be(const void *memory) { value_type t = reinterpret_cast<const detail::alignment_access_helper <value_type, align> *>(memory)->val; @@ -77,7 +77,7 @@ namespace endian { } template<typename value_type, alignment align> - static void write_be(void *memory, value_type value) { + inline void write_be(void *memory, value_type value) { if (sys::isLittleEndianHost()) value = sys::SwapByteOrder(value); reinterpret_cast<detail::alignment_access_helper<value_type, align> *> diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 4bee20549c..4eb75c47bc 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -94,6 +94,55 @@ struct space_info { uint64_t available; }; + +enum perms { + no_perms = 0, + owner_read = 0400, + owner_write = 0200, + owner_exe = 0100, + owner_all = owner_read | owner_write | owner_exe, + group_read = 040, + group_write = 020, + group_exe = 010, + group_all = group_read | group_write | group_exe, + others_read = 04, + others_write = 02, + others_exe = 01, + others_all = others_read | others_write | others_exe, + all_all = owner_all | group_all | others_all, + set_uid_on_exe = 04000, + set_gid_on_exe = 02000, + sticky_bit = 01000, + perms_mask = all_all | set_uid_on_exe | set_gid_on_exe | sticky_bit, + perms_not_known = 0xFFFF, + add_perms = 0x1000, + remove_perms = 0x2000, + symlink_perms = 0x4000 +}; + +// Helper functions so that you can use & and | to manipulate perms bits: +inline perms operator|(perms l , perms r) { + return static_cast<perms>( + static_cast<unsigned short>(l) | static_cast<unsigned short>(r)); +} +inline perms operator&(perms l , perms r) { + return static_cast<perms>( + static_cast<unsigned short>(l) & static_cast<unsigned short>(r)); +} +inline perms &operator|=(perms &l, perms r) { + l = l | r; + return l; +} +inline perms &operator&=(perms &l, perms r) { + l = l & r; + return l; +} +inline perms operator~(perms x) { + return static_cast<perms>(~static_cast<unsigned short>(x)); +} + + + /// file_status - Represents the result of a call to stat and friends. It has /// a platform specific member to store the result. class file_status @@ -113,12 +162,19 @@ class file_status friend bool equivalent(file_status A, file_status B); friend error_code status(const Twine &path, file_status &result); file_type Type; + perms Perms; public: - explicit file_status(file_type v=file_type::status_error) - : Type(v) {} + explicit file_status(file_type v=file_type::status_error, + perms prms=perms_not_known) + : Type(v), Perms(prms) {} + // getters file_type type() const { return Type; } + perms permissions() const { return Perms; } + + // setters void type(file_type v) { Type = v; } + void permissions(perms p) { Perms = p; } }; /// file_magic - An "enum class" enumeration of file types based on magic (the first @@ -395,6 +451,13 @@ error_code is_symlink(const Twine &path, bool &result); /// platform specific error_code. error_code status(const Twine &path, file_status &result); +/// @brief Modifies permission bits on a file +/// +/// @param path Input path. +/// @results errc::success if permissions have been changed, otherwise a +/// platform specific error_code. +error_code permissions(const Twine &path, perms prms); + /// @brief Is status available? /// /// @param path Input path. @@ -513,6 +576,33 @@ error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result); error_code GetMainExecutable(const char *argv0, void *MainAddr, SmallVectorImpl<char> &result); + +/// @brief Memory maps the contents of a file +/// +/// @param path Path to file to map. +/// @param file_offset Byte offset in file where mapping should begin. +/// @param size_t Byte length of range of the file to map. +/// @param map_writable If true, the file will be mapped in r/w such +/// that changes to the the mapped buffer will be flushed back +/// to the file. If false, the file will be mapped read-only +/// and the buffer will be read-only. +/// @param result Set to the start address of the mapped buffer. +/// @results errc::success if result has been successfully set, otherwise a +/// platform specific error_code. +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result); + + +/// @brief Memory unmaps the contents of a file +/// +/// @param base Pointer to the start of the buffer. +/// @param size Byte length of the range to unmmap. +/// @results errc::success if result has been successfully set, otherwise a +/// platform specific error_code. +error_code unmap_file_pages(void *base, size_t size); + + + /// @} /// @name Iterators /// @{ diff --git a/include/llvm/Support/IntegersSubset.h b/include/llvm/Support/IntegersSubset.h index 2ceeea5b66..bb9e76925e 100644 --- a/include/llvm/Support/IntegersSubset.h +++ b/include/llvm/Support/IntegersSubset.h @@ -25,7 +25,7 @@ #include "llvm/LLVMContext.h" namespace llvm { - + // The IntItem is a wrapper for APInt. // 1. It determines sign of integer, it allows to use // comparison operators >,<,>=,<=, and as result we got shorter and cleaner @@ -33,90 +33,96 @@ namespace llvm { // 2. It helps to implement PR1255 (case ranges) as a series of small patches. // 3. Currently we can interpret IntItem both as ConstantInt and as APInt. // It allows to provide SwitchInst methods that works with ConstantInt for - // non-updated passes. And it allows to use APInt interface for new methods. + // non-updated passes. And it allows to use APInt interface for new methods. // 4. IntItem can be easily replaced with APInt. - - // The set of macros that allows to propagate APInt operators to the IntItem. + + // The set of macros that allows to propagate APInt operators to the IntItem. #define INT_ITEM_DEFINE_COMPARISON(op,func) \ bool operator op (const APInt& RHS) const { \ - return ConstantIntVal->getValue().func(RHS); \ + return getAPIntValue().func(RHS); \ } - + #define INT_ITEM_DEFINE_UNARY_OP(op) \ IntItem operator op () const { \ - APInt res = op(ConstantIntVal->getValue()); \ + APInt res = op(getAPIntValue()); \ Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); \ return IntItem(cast<ConstantInt>(NewVal)); \ } - + #define INT_ITEM_DEFINE_BINARY_OP(op) \ IntItem operator op (const APInt& RHS) const { \ - APInt res = ConstantIntVal->getValue() op RHS; \ + APInt res = getAPIntValue() op RHS; \ Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); \ return IntItem(cast<ConstantInt>(NewVal)); \ } - + #define INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(op) \ IntItem& operator op (const APInt& RHS) {\ - APInt res = ConstantIntVal->getValue();\ + APInt res = getAPIntValue();\ res op RHS; \ Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); \ ConstantIntVal = cast<ConstantInt>(NewVal); \ return *this; \ - } - + } + #define INT_ITEM_DEFINE_PREINCDEC(op) \ IntItem& operator op () { \ - APInt res = ConstantIntVal->getValue(); \ + APInt res = getAPIntValue(); \ op(res); \ Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); \ ConstantIntVal = cast<ConstantInt>(NewVal); \ return *this; \ - } + } #define INT_ITEM_DEFINE_POSTINCDEC(op) \ IntItem& operator op (int) { \ - APInt res = ConstantIntVal->getValue();\ + APInt res = getAPIntValue();\ op(res); \ Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); \ OldConstantIntVal = ConstantIntVal; \ ConstantIntVal = cast<ConstantInt>(NewVal); \ return IntItem(OldConstantIntVal); \ - } - + } + #define INT_ITEM_DEFINE_OP_STANDARD_INT(RetTy, op, IntTy) \ RetTy operator op (IntTy RHS) const { \ - return (*this) op APInt(ConstantIntVal->getValue().getBitWidth(), RHS); \ - } + return (*this) op APInt(getAPIntValue().getBitWidth(), RHS); \ + } class IntItem { ConstantInt *ConstantIntVal; - IntItem(const ConstantInt *V) : ConstantIntVal(const_cast<ConstantInt*>(V)) {} + const APInt* APIntVal; + IntItem(const ConstantInt *V) : + ConstantIntVal(const_cast<ConstantInt*>(V)), + APIntVal(&ConstantIntVal->getValue()){} + const APInt& getAPIntValue() const { + return *APIntVal; + } public: - + IntItem() {} - + operator const APInt&() const { - return (const APInt&)ConstantIntVal->getValue(); - } - + return getAPIntValue(); + } + // Propagate APInt operators. // Note, that // /,/=,>>,>>= are not implemented in APInt. // <<= is implemented for unsigned RHS, but not implemented for APInt RHS. - + INT_ITEM_DEFINE_COMPARISON(<, ult) INT_ITEM_DEFINE_COMPARISON(>, ugt) INT_ITEM_DEFINE_COMPARISON(<=, ule) INT_ITEM_DEFINE_COMPARISON(>=, uge) - + INT_ITEM_DEFINE_COMPARISON(==, eq) INT_ITEM_DEFINE_OP_STANDARD_INT(bool,==,uint64_t) - + INT_ITEM_DEFINE_COMPARISON(!=, ne) INT_ITEM_DEFINE_OP_STANDARD_INT(bool,!=,uint64_t) - + INT_ITEM_DEFINE_BINARY_OP(*) INT_ITEM_DEFINE_BINARY_OP(+) INT_ITEM_DEFINE_OP_STANDARD_INT(IntItem,+,uint64_t) @@ -127,32 +133,32 @@ public: INT_ITEM_DEFINE_BINARY_OP(&) INT_ITEM_DEFINE_BINARY_OP(^) INT_ITEM_DEFINE_BINARY_OP(|) - + INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(*=) INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(+=) INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(-=) INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(&=) INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(^=) INT_ITEM_DEFINE_ASSIGNMENT_BY_OP(|=) - + // Special case for <<= IntItem& operator <<= (unsigned RHS) { - APInt res = ConstantIntVal->getValue(); + APInt res = getAPIntValue(); res <<= RHS; Constant *NewVal = ConstantInt::get(ConstantIntVal->getContext(), res); ConstantIntVal = cast<ConstantInt>(NewVal); - return *this; + return *this; } - + INT_ITEM_DEFINE_UNARY_OP(-) INT_ITEM_DEFINE_UNARY_OP(~) - + INT_ITEM_DEFINE_PREINCDEC(++) INT_ITEM_DEFINE_PREINCDEC(--) - + // The set of workarounds, since currently we use ConstantInt implemented // integer. - + static IntItem fromConstantInt(const ConstantInt *V) { return IntItem(V); } @@ -179,22 +185,22 @@ protected: bool IsSingleNumber : 1; public: - typedef IntRange<IntType> self; + typedef IntRange<IntType> self; typedef std::pair<self, self> SubRes; - + IntRange() : IsEmpty(true) {} IntRange(const self &RHS) : Low(RHS.Low), High(RHS.High), IsEmpty(RHS.IsEmpty), IsSingleNumber(RHS.IsSingleNumber) {} IntRange(const IntType &C) : Low(C), High(C), IsEmpty(false), IsSingleNumber(true) {} - + IntRange(const IntType &L, const IntType &H) : Low(L), High(H), IsEmpty(false), IsSingleNumber(Low == High) {} - + bool isEmpty() const { return IsEmpty; } bool isSingleNumber() const { return IsSingleNumber; } - + const IntType& getLow() const { assert(!IsEmpty && "Range is empty."); return Low; @@ -203,7 +209,7 @@ public: assert(!IsEmpty && "Range is empty."); return High; } - + bool operator<(const self &RHS) const { assert(!IsEmpty && "Left range is empty."); assert(!RHS.IsEmpty && "Right range is empty."); @@ -220,37 +226,37 @@ public: bool operator==(const self &RHS) const { assert(!IsEmpty && "Left range is empty."); assert(!RHS.IsEmpty && "Right range is empty."); - return Low == RHS.Low && High == RHS.High; + return Low == RHS.Low && High == RHS.High; } - + bool operator!=(const self &RHS) const { - return !operator ==(RHS); + return !operator ==(RHS); } - + static bool LessBySize(const self &LHS, const self &RHS) { return (LHS.High - LHS.Low) < (RHS.High - RHS.Low); } - + bool isInRange(const IntType &IntVal) const { assert(!IsEmpty && "Range is empty."); - return IntVal >= Low && IntVal <= High; - } - + return IntVal >= Low && IntVal <= High; + } + SubRes sub(const self &RHS) const { SubRes Res; - + // RHS is either more global and includes this range or // if it doesn't intersected with this range. if (!isInRange(RHS.Low) && !isInRange(RHS.High)) { - + // If RHS more global (it is enough to check // only one border in this case. if (RHS.isInRange(Low)) - return std::make_pair(self(Low, High), self()); - + return std::make_pair(self(Low, High), self()); + return Res; } - + if (Low < RHS.Low) { Res.first.Low = Low; IntType NewHigh = RHS.Low; @@ -263,9 +269,9 @@ public: Res.second.Low = NewLow; Res.second.High = High; } - return Res; + return Res; } - }; + }; //===----------------------------------------------------------------------===// /// IntegersSubsetGeneric - class that implements the subset of integers. It @@ -278,42 +284,56 @@ public: // In short, for more compact memory consumption we can store flat // numbers collection, and define range as pair of indices. // In that case we can safe some memory on 32 bit machines. - typedef std::list<IntTy> FlatCollectionTy; + typedef std::vector<IntTy> FlatCollectionTy; typedef std::pair<IntTy*, IntTy*> RangeLinkTy; - typedef SmallVector<RangeLinkTy, 64> RangeLinksTy; + typedef std::vector<RangeLinkTy> RangeLinksTy; typedef typename RangeLinksTy::const_iterator RangeLinksConstIt; - + typedef IntegersSubsetGeneric<IntTy> self; - + protected: - + FlatCollectionTy FlatCollection; RangeLinksTy RangeLinks; - + + bool IsSingleNumber; + bool IsSingleNumbersOnly; + public: - + template<class RangesCollectionTy> explicit IntegersSubsetGeneric(const RangesCollectionTy& Links) { assert(Links.size() && "Empty ranges are not allowed."); + + // In case of big set of single numbers consumes additional RAM space, + // but allows to avoid additional reallocation. + FlatCollection.reserve(Links.size() * 2); + RangeLinks.reserve(Links.size()); + IsSingleNumbersOnly = true; for (typename RangesCollectionTy::const_iterator i = Links.begin(), e = Links.end(); i != e; ++i) { RangeLinkTy RangeLink; FlatCollection.push_back(i->getLow()); RangeLink.first = &FlatCollection.back(); - if (i->getLow() != i->getHigh()) + if (i->getLow() != i->getHigh()) { FlatCollection.push_back(i->getHigh()); + IsSingleNumbersOnly = false; + } RangeLink.second = &FlatCollection.back(); RangeLinks.push_back(RangeLink); } + IsSingleNumber = IsSingleNumbersOnly && RangeLinks.size() == 1; } - + IntegersSubsetGeneric(const self& RHS) { *this = RHS; } - + self& operator=(const self& RHS) { FlatCollection.clear(); RangeLinks.clear(); + FlatCollection.reserve(RHS.RangeLinks.size() * 2); + RangeLinks.reserve(RHS.RangeLinks.size()); for (RangeLinksConstIt i = RHS.RangeLinks.begin(), e = RHS.RangeLinks.end(); i != e; ++i) { RangeLinkTy RangeLink; @@ -324,26 +344,35 @@ public: RangeLink.second = &FlatCollection.back(); RangeLinks.push_back(RangeLink); } + IsSingleNumber = RHS.IsSingleNumber; + IsSingleNumbersOnly = RHS.IsSingleNumbersOnly; return *this; } - + typedef IntRange<IntTy> Range; - + /// Checks is the given constant satisfies this case. Returns /// true if it equals to one of contained values or belongs to the one of /// contained ranges. bool isSatisfies(const IntTy &CheckingVal) const { + if (IsSingleNumber) + return FlatCollection.front() == CheckingVal; + if (IsSingleNumbersOnly) + return std::find(FlatCollection.begin(), + FlatCollection.end(), + CheckingVal) != FlatCollection.end(); + for (unsigned i = 0, e = getNumItems(); i < e; ++i) { if (RangeLinks[i].first == RangeLinks[i].second) { if (*RangeLinks[i].first == CheckingVal) return true; } else if (*RangeLinks[i].first <= CheckingVal && - *RangeLinks[i].second >= CheckingVal) + *RangeLinks[i].second >= CheckingVal) return true; } - return false; + return false; } - + /// Returns set's item with given index. Range getItem(unsigned idx) const { const RangeLinkTy &Link = RangeLinks[idx]; @@ -351,25 +380,29 @@ public: return Range(*Link.first, *Link.second); else return Range(*Link.first); - } - + } + /// Return number of items (ranges) stored in set. unsigned getNumItems() const { return RangeLinks.size(); } - + /// Returns true if whole subset contains single element. bool isSingleNumber() const { - return RangeLinks.size() == 1 && - RangeLinks[0].first == RangeLinks[0].second; + return IsSingleNumber; + } + + /// Returns true if whole subset contains only single numbers, no ranges. + bool isSingleNumbersOnly() const { + return IsSingleNumbersOnly; } /// Does the same like getItem(idx).isSingleNumber(), but - /// works faster, since we avoid creation of temporary range object. + /// works faster, since we avoid creation of temporary range object. bool isSingleNumber(unsigned idx) const { return RangeLinks[idx].first == RangeLinks[idx].second; } - + /// Returns set the size, that equals number of all values + sizes of all /// ranges. /// Ranges set is considered as flat numbers collection. @@ -383,18 +416,18 @@ public: APInt S = High - Low + 1; sz += S; } - return sz.getZExtValue(); + return sz.getZExtValue(); } - + /// Allows to access single value even if it belongs to some range. /// Ranges set is considered as flat numbers collection. - /// [<1>, <4,8>] is considered as [1,4,5,6,7,8] + /// [<1>, <4,8>] is considered as [1,4,5,6,7,8] /// For range [<1>, <4,8>] getSingleValue(3) returns 6. APInt getSingleValue(unsigned idx) const { APInt sz(((const APInt&)getItem(0).getLow()).getBitWidth(), 0); for (unsigned i = 0, e = getNumItems(); i != e; ++i) { const APInt &Low = getItem(i).getLow(); - const APInt &High = getItem(i).getHigh(); + const APInt &High = getItem(i).getHigh(); APInt S = High - Low + 1; APInt oldSz = sz; sz += S; @@ -407,26 +440,34 @@ public: } } assert(0 && "Index exceeds high border."); - return sz; + return sz; + } + + /// Does the same as getSingleValue, but works only if subset contains + /// single numbers only. + const IntTy& getSingleNumber(unsigned idx) const { + assert(IsSingleNumbersOnly && "This method works properly if subset " + "contains single numbers only."); + return FlatCollection[idx]; } -}; +}; //===----------------------------------------------------------------------===// /// IntegersSubset - currently is extension of IntegersSubsetGeneric /// that also supports conversion to/from Constant* object. class IntegersSubset : public IntegersSubsetGeneric<IntItem> { - + typedef IntegersSubsetGeneric<IntItem> ParentTy; - + Constant *Holder; - + static unsigned getNumItemsFromConstant(Constant *C) { return cast<ArrayType>(C->getType())->getNumElements(); } - + static Range getItemFromConstant(Constant *C, unsigned idx) { const Constant *CV = C->getAggregateElement(idx); - + unsigned NumEls = cast<VectorType>(CV->getType())->getNumElements(); switch (NumEls) { case 1: @@ -442,9 +483,9 @@ class IntegersSubset : public IntegersSubsetGeneric<IntItem> { default: assert(0 && "Only pairs and single numbers are allowed here."); return Range(); - } - } - + } + } + std::vector<Range> rangesFromConstant(Constant *C) { unsigned NumItems = getNumItemsFromConstant(C); std::vector<Range> r; @@ -453,12 +494,16 @@ class IntegersSubset : public IntegersSubsetGeneric<IntItem> { r.push_back(getItemFromConstant(C, i)); return r; } - + public: - - IntegersSubset(Constant *C) : ParentTy(rangesFromConstant(C)), - Holder(C) {} - + + explicit IntegersSubset(Constant *C) : ParentTy(rangesFromConstant(C)), + Holder(C) {} + + IntegersSubset(const IntegersSubset& RHS) : + ParentTy(*(const ParentTy *)&RHS), // FIXME: tweak for msvc. + Holder(RHS.Holder) {} + template<class RangesCollectionTy> explicit IntegersSubset(const RangesCollectionTy& Src) : ParentTy(Src) { std::vector<Constant*> Elts; @@ -478,18 +523,18 @@ public: r.push_back(R.getLow().toConstantInt()); } Constant *CV = ConstantVector::get(r); - Elts.push_back(CV); + Elts.push_back(CV); } ArrayType *ArrTy = ArrayType::get(Elts.front()->getType(), (uint64_t)Elts.size()); - Holder = ConstantArray::get(ArrTy, Elts); + Holder = ConstantArray::get(ArrTy, Elts); } - + operator Constant*() { return Holder; } operator const Constant*() const { return Holder; } Constant *operator->() { return Holder; } const Constant *operator->() const { return Holder; } -}; +}; } diff --git a/include/llvm/Support/IntegersSubsetMapping.h b/include/llvm/Support/IntegersSubsetMapping.h index c79b3c1684..87d0755c51 100644 --- a/include/llvm/Support/IntegersSubsetMapping.h +++ b/include/llvm/Support/IntegersSubsetMapping.h @@ -31,6 +31,10 @@ template <class SuccessorClass, class IntegersSubsetTy = IntegersSubset, class IntTy = IntItem> class IntegersSubsetMapping { + // FIXME: To much similar iterators typedefs, similar names. + // - Rename RangeIterator to the cluster iterator. + // - Remove unused "add" methods. + // - Class contents needs cleaning. public: typedef IntRange<IntTy> RangeTy; @@ -47,15 +51,17 @@ public: typedef std::pair<RangeEx, SuccessorClass*> Cluster; + typedef std::list<RangeTy> RangesCollection; + typedef typename RangesCollection::iterator RangesCollectionIt; + typedef typename RangesCollection::const_iterator RangesCollectionConstIt; + typedef IntegersSubsetMapping<SuccessorClass, IntegersSubsetTy, IntTy> self; + protected: typedef std::list<Cluster> CaseItems; typedef typename CaseItems::iterator CaseItemIt; typedef typename CaseItems::const_iterator CaseItemConstIt; - typedef std::list<RangeTy> RangesCollection; - typedef typename RangesCollection::iterator RangesCollectionIt; - // TODO: Change unclean CRS prefixes to SubsetMap for example. typedef std::map<SuccessorClass*, RangesCollection > CRSMap; typedef typename CRSMap::iterator CRSMapIt; @@ -66,12 +72,6 @@ protected: } }; - struct ClusterLefterThan { - bool operator()(const Cluster &C, const RangeTy &R) { - return C.first.getHigh() < R.getLow(); - } - }; - CaseItems Items; bool Sorted; @@ -103,39 +103,148 @@ protected: } } - void exclude(CaseItemIt &beginIt, RangeTy &R) { + enum DiffProcessState { + L_OPENED, + INTERSECT_OPENED, + R_OPENED, + ALL_IS_CLOSED + }; + + class DiffStateMachine { - std::list<CaseItemIt> ToBeErased; - - CaseItemIt endIt = Items.end(); - CaseItemIt It = - std::lower_bound(beginIt, Items.end(), R, ClusterLefterThan()); + DiffProcessState State; + IntTy OpenPt; + SuccessorClass *CurrentLSuccessor; + SuccessorClass *CurrentRSuccessor; - if (It == endIt) - return; - - if (It->first.getLow() < R.getLow()) - Items.insert(It, std::make_pair( - RangeTy(It->first.getLow(), R.getLow()-1), - It->second)); - - do - ToBeErased.push_back(It++); - while (It != endIt && It->first.getLow() <= R.getHigh()); + self *LeftMapping; + self *IntersectionMapping; + self *RightMapping; - beginIt = It; + public: - CaseItemIt &LastRemoved = *(--ToBeErased.end()); - if (LastRemoved->first.getHigh() > R.getHigh()) - beginIt = Items.insert(LastRemoved, std::make_pair( - RangeTy(R.getHigh() + 1, LastRemoved->first.getHigh()), - LastRemoved->second - )); + typedef + IntegersSubsetMapping<SuccessorClass, IntegersSubsetTy, IntTy> MappingTy; - for (typename std::list<CaseItemIt>::iterator i = ToBeErased.begin(), - e = ToBeErased.end(); i != e; ++i) - Items.erase(*i); - } + DiffStateMachine(MappingTy *L, + MappingTy *Intersection, + MappingTy *R) : + State(ALL_IS_CLOSED), + LeftMapping(L), + IntersectionMapping(Intersection), + RightMapping(R) + {} + + void onLOpen(const IntTy &Pt, SuccessorClass *S) { + switch (State) { + case R_OPENED: + if (Pt > OpenPt/*Don't add empty ranges.*/ && RightMapping) + RightMapping->add(OpenPt, Pt-1, CurrentRSuccessor); + State = INTERSECT_OPENED; + break; + case ALL_IS_CLOSED: + State = L_OPENED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + CurrentLSuccessor = S; + OpenPt = Pt; + } + + void onLClose(const IntTy &Pt) { + switch (State) { + case L_OPENED: + assert(Pt >= OpenPt && + "Subset is not sorted or contains overlapped ranges"); + if (LeftMapping) + LeftMapping->add(OpenPt, Pt, CurrentLSuccessor); + State = ALL_IS_CLOSED; + break; + case INTERSECT_OPENED: + if (IntersectionMapping) + IntersectionMapping->add(OpenPt, Pt, CurrentLSuccessor); + OpenPt = Pt + 1; + State = R_OPENED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + } + + void onROpen(const IntTy &Pt, SuccessorClass *S) { + switch (State) { + case L_OPENED: + if (Pt > OpenPt && LeftMapping) + LeftMapping->add(OpenPt, Pt-1, CurrentLSuccessor); + State = INTERSECT_OPENED; + break; + case ALL_IS_CLOSED: + State = R_OPENED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + CurrentRSuccessor = S; + OpenPt = Pt; + } + + void onRClose(const IntTy &Pt) { + switch (State) { + case R_OPENED: + assert(Pt >= OpenPt && + "Subset is not sorted or contains overlapped ranges"); + if (RightMapping) + RightMapping->add(OpenPt, Pt, CurrentRSuccessor); + State = ALL_IS_CLOSED; + break; + case INTERSECT_OPENED: + if (IntersectionMapping) + IntersectionMapping->add(OpenPt, Pt, CurrentLSuccessor); + OpenPt = Pt + 1; + State = L_OPENED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + } + + void onRLOpen(const IntTy &Pt, + SuccessorClass *LS, + SuccessorClass *RS) { + switch (State) { + case ALL_IS_CLOSED: + State = INTERSECT_OPENED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + CurrentLSuccessor = LS; + CurrentRSuccessor = RS; + OpenPt = Pt; + } + + void onRLClose(const IntTy &Pt) { + switch (State) { + case INTERSECT_OPENED: + if (IntersectionMapping) + IntersectionMapping->add(OpenPt, Pt, CurrentLSuccessor); + State = ALL_IS_CLOSED; + break; + default: + assert(0 && "Got unexpected point."); + break; + } + } + + bool isLOpened() { return State == L_OPENED; } + bool isROpened() { return State == R_OPENED; } + }; public: @@ -227,18 +336,107 @@ public: } } + void add(self& RHS) { + Items.insert(Items.end(), RHS.Items.begin(), RHS.Items.end()); + } + + void add(const RangesCollection& RHS, SuccessorClass *S = 0) { + for (RangesCollectionConstIt i = RHS.begin(), e = RHS.end(); i != e; ++i) + add(*i, S); + } + /// Removes items from set. void removeItem(RangeIterator i) { Items.erase(i); } - // Excludes RHS subset from current mapping. RHS should consists of non - // overlapped ranges only and sorted from left to the right. - // method will have unpredictional behaviour in another case. - void exclude(IntegersSubsetTy &RHS) { - CaseItemIt startIt = begin(); - for (unsigned i = 0, e = RHS.getNumItems(); - i != e && startIt != end(); ++i) { - RangeTy R = RHS.getItem(i); - exclude(startIt, R); + /// Calculates the difference between this mapping and RHS. + /// THIS without RHS is placed into LExclude, + /// RHS without THIS is placed into RExclude, + /// THIS intersect RHS is placed into Intersection. + void diff(self *LExclude, self *Intersection, self *RExclude, + const self& RHS) { + + DiffStateMachine Machine(LExclude, Intersection, RExclude); + + CaseItemConstIt L = Items.begin(), R = RHS.Items.begin(); + while (L != Items.end() && R != RHS.Items.end()) { + const Cluster &LCluster = *L; + const RangeEx &LRange = LCluster.first; + const Cluster &RCluster = *R; + const RangeEx &RRange = RCluster.first; + + if (LRange.getHigh() < RRange.getLow()) { + Machine.onLOpen(LRange.getLow(), LCluster.second); + Machine.onLClose(LRange.getHigh()); + ++L; + continue; + } + + if (LRange.getLow() > RRange.getHigh()) { + Machine.onROpen(RRange.getLow(), RCluster.second); + Machine.onRClose(RRange.getHigh()); + ++R; + continue; + } + + if (LRange.getLow() < RRange.getLow()) { + // May be opened in previous iteration. + if (!Machine.isLOpened()) + Machine.onLOpen(LRange.getLow(), LCluster.second); + Machine.onROpen(RRange.getLow(), RCluster.second); + } + else if (RRange.getLow() < LRange.getLow()) { + if (!Machine.isROpened()) + Machine.onROpen(RRange.getLow(), RCluster.second); + Machine.onLOpen(LRange.getLow(), LCluster.second); + } + else + Machine.onRLOpen(LRange.getLow(), LCluster.second, RCluster.second); + + if (LRange.getHigh() < RRange.getHigh()) { + Machine.onLClose(LRange.getHigh()); + ++L; + while(L != Items.end() && L->first.getHigh() < RRange.getHigh()) { + Machine.onLOpen(L->first.getLow(), L->second); + Machine.onLClose(L->first.getHigh()); + ++L; + } + } + else if (RRange.getHigh() < LRange.getHigh()) { + Machine.onRClose(RRange.getHigh()); + ++R; + while(R != RHS.Items.end() && R->first.getHigh() < LRange.getHigh()) { + Machine.onROpen(R->first.getLow(), R->second); + Machine.onRClose(R->first.getHigh()); + ++R; + } + } + else { + Machine.onRLClose(LRange.getHigh()); + ++L; + ++R; + } + } + + if (L != Items.end()) { + if (Machine.isLOpened()) { + Machine.onLClose(L->first.getHigh()); + ++L; + } + if (LExclude) + while (L != Items.end()) { + LExclude->add(L->first, L->second); + ++L; + } + } else if (R != RHS.Items.end()) { + if (Machine.isROpened()) { + Machine.onRClose(R->first.getHigh()); + ++R; + } + if (RExclude) + while (R != RHS.Items.end()) { + RExclude->add(R->first, R->second); + ++R; + } } } @@ -270,6 +468,11 @@ public: // 2. After first item will added Sorted flag will cleared. } + // Returns number of clusters + unsigned size() const { + return Items.size(); + } + RangeIterator begin() { return Items.begin(); } RangeIterator end() { return Items.end(); } }; diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index d085c94f2a..4005161320 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -414,14 +414,14 @@ int IsInf(double d); /// MinAlign - A and B are either alignments or offsets. Return the minimum /// alignment that may be assumed after adding the two together. -static inline uint64_t MinAlign(uint64_t A, uint64_t B) { +inline uint64_t MinAlign(uint64_t A, uint64_t B) { // The largest power of 2 that divides both A and B. return (A | B) & -(A | B); } /// NextPowerOf2 - Returns the next power of two (in 64-bits) /// that is strictly greater than A. Returns zero on overflow. -static inline uint64_t NextPowerOf2(uint64_t A) { +inline uint64_t NextPowerOf2(uint64_t A) { A |= (A >> 1); A |= (A >> 2); A |= (A >> 4); diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 621a9c83ba..0e87a53a6c 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -111,13 +111,20 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> { let SubRegs = subregs; } +// DAGOperand - An empty base class that unifies RegisterClass's and other forms +// of Operand's that are legal as type qualifiers in DAG patterns. This should +// only ever be used for defining multiclasses that are polymorphic over both +// RegisterClass's and other Operand's. +class DAGOperand { } + // RegisterClass - Now that all of the registers are defined, and aliases // between registers are defined, specify which registers belong to which // register classes. This also defines the default allocation order of // registers by register allocators. // class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, - dag regList, RegAltNameIndex idx = NoRegAltName> { + dag regList, RegAltNameIndex idx = NoRegAltName> + : DAGOperand { string Namespace = namespace; // RegType - Specify the list ValueType of the registers in this register @@ -523,6 +530,11 @@ class AsmOperandClass { /// to immediates or registers and are very instruction specific (as flags to /// set in a processor register, coprocessor number, ...). string ParserMethod = ?; + + // The diagnostic type to present when referencing this operand in a + // match failure error message. By default, use a generic "invalid operand" + // diagnostic. The target AsmParser maps these codes to text. + string DiagnosticType = ""; } def ImmAsmOperand : AsmOperandClass { @@ -532,7 +544,7 @@ def ImmAsmOperand : AsmOperandClass { /// Operand Types - These provide the built-in operand types that may be used /// by a target. Targets can optionally provide their own operand types as /// needed, though this should not be needed for RISC targets. -class Operand<ValueType ty> { +class Operand<ValueType ty> : DAGOperand { ValueType Type = ty; string PrintMethod = "printOperand"; string EncoderMethod = ""; diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 4e73139881..f096946b38 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -623,19 +623,22 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, const ScheduleDAG *DAG) const = 0; - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, int &Mask, int &Value) const { + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const { return false; } - /// OptimizeCompareInstr - See if the comparison instruction can be converted + /// optimizeCompareInstr - See if the comparison instruction can be converted /// into something more efficient. E.g., on ARM most instructions can set the /// flags register, obviating the need for a separate CMP. - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, int Mask, int Value, + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, const MachineRegisterInfo *MRI) const { return false; } @@ -648,7 +651,9 @@ public: } /// getNumMicroOps - Return the number of u-operations the given machine - /// instruction will be decoded to on the target cpu. + /// instruction will be decoded to on the target cpu. The itinerary's + /// IssueWidth is the number of microops that can be dispatched each + /// cycle. An instruction with zero microops takes no dispatch resources. virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const = 0; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 896ba39096..946e13c697 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -689,6 +689,12 @@ public: return UseUnderscoreLongJmp; } + /// supportJumpTables - return whether the target can generate code for + /// jump tables. + bool supportJumpTables() const { + return SupportJumpTables; + } + /// getStackPointerRegisterToSaveRestore - If a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1003,6 +1009,12 @@ protected: UseUnderscoreLongJmp = Val; } + /// setSupportJumpTables - Indicate whether the target can generate code for + /// jump tables. + void setSupportJumpTables(bool Val) { + SupportJumpTables = Val; + } + /// setStackPointerRegisterToSaveRestore - If set to a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1776,6 +1788,10 @@ private: /// llvm.longjmp. Defaults to false. bool UseUnderscoreLongJmp; + /// SupportJumpTables - Whether the target can generate code for jumptables. + /// If it's not true, then each jumptable must be lowered into if-then-else's. + bool SupportJumpTables; + /// BooleanContents - Information about the contents of the high-bits in /// boolean values held in a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 1a0560478a..e4bf32bd86 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -14,6 +14,7 @@ #ifndef LLVM_TARGET_TARGETMACHINE_H #define LLVM_TARGET_TARGETMACHINE_H +#include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringRef.h" @@ -247,7 +248,9 @@ public: virtual bool addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &, CodeGenFileType, - bool /*DisableVerify*/ = true) { + bool /*DisableVerify*/ = true, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0) { return true; } @@ -297,7 +300,9 @@ public: virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify = true); + bool DisableVerify = true, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0); /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 92e627cea8..a6fb243f50 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -36,20 +36,28 @@ namespace llvm { extern bool TLSUseCall; // @LOCALMOD-END + namespace FPOpFusion { + enum FPOpFusionMode { + Fast, // Enable fusion of FP ops wherever it's profitable. + Standard, // Only allow fusion of 'blessed' ops (currently just fmuladd). + Strict // Never fuse FP-ops. + }; + } + class TargetOptions { public: TargetOptions() : PrintMachineCode(false), NoFramePointerElim(false), NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false), - NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false), + UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false), UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false), JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), - StackAlignmentOverride(0), RealignStack(true), - DisableJumpTables(false), EnableFastISel(false), + StackAlignmentOverride(0), RealignStack(true), EnableFastISel(false), PositionIndependentExecutable(false), EnableSegmentedStacks(false), - TrapFuncName(""), FloatABIType(FloatABI::Default) + UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default), + AllowFPOpFusion(FPOpFusion::Standard) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs @@ -80,14 +88,6 @@ namespace llvm { unsigned LessPreciseFPMADOption : 1; bool LessPreciseFPMAD() const; - /// NoExcessFPPrecision - This flag is enabled when the - /// -disable-excess-fp-precision flag is specified on the command line. - /// When this flag is off (the default), the code generator is allowed to - /// produce results that are "more precise" than IEEE allows. This includes - /// use of FMA-like operations and use of the X86 FP registers without - /// rounding all over the place. - unsigned NoExcessFPPrecision : 1; - /// UnsafeFPMath - This flag is enabled when the /// -enable-unsafe-fp-math flag is specified on the command line. When /// this flag is off (the default), the code generator is not allowed to @@ -161,10 +161,6 @@ namespace llvm { /// automatically realigned, if needed. unsigned RealignStack : 1; - /// DisableJumpTables - This flag indicates jump tables should not be - /// generated. - unsigned DisableJumpTables : 1; - /// EnableFastISel - This flag enables fast-path instruction selection /// which trades away generated code quality in favor of reducing /// compile time. @@ -178,6 +174,10 @@ namespace llvm { unsigned EnableSegmentedStacks : 1; + /// UseInitArray - Use .init_array instead of .ctors for static + /// constructors. + unsigned UseInitArray : 1; + /// getTrapFunctionName - If this returns a non-empty string, this means /// isel should lower Intrinsic::trap to a call to the specified function /// name instead of an ISD::TRAP node. @@ -191,6 +191,25 @@ namespace llvm { /// Such a combination is unfortunately popular (e.g. arm-apple-darwin). /// Hard presumes that the normal FP ABI is used. FloatABI::ABIType FloatABIType; + + /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option. + /// This controls the creation of fused FP ops that store intermediate + /// results in higher precision than IEEE allows (E.g. FMAs). + /// + /// Fast mode - allows formation of fused FP ops whenever they're + /// profitable. + /// Standard mode - allow fusion only for 'blessed' FP ops. At present the + /// only blessed op is the fmuladd intrinsic. In the future more blessed ops + /// may be added. + /// Strict mode - allow fusion only if/when it can be proven that the excess + /// precision won't effect the result. + /// + /// Note: This option only controls formation of fused ops by the optimizers. + /// Fused operations that are explicitly specified (e.g. FMA via the + /// llvm.fma.* intrinsic) will always be honored, regardless of the value of + /// this option. + FPOpFusion::FPOpFusionMode AllowFPOpFusion; + }; } // End llvm namespace diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 31e8b17f25..caa5a84c83 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -73,20 +73,20 @@ class InstrStage<int cycles, list<FuncUnit> units, // across all chip sets. Thus a new chip set can be added without modifying // instruction information. // -// NumMicroOps represents the number of micro-operations that each instruction -// in the class are decoded to. If the number is zero, then it means the -// instruction can decode into variable number of micro-ops and it must be -// determined dynamically. -// -class InstrItinClass<int ops = 1> { - int NumMicroOps = ops; -} +class InstrItinClass; def NoItinerary : InstrItinClass; //===----------------------------------------------------------------------===// // Instruction itinerary data - These values provide a runtime map of an // instruction itinerary class (name) to its itinerary data. // +// NumMicroOps represents the number of micro-operations that each instruction +// in the class are decoded to. If the number is zero, then it means the +// instruction can decode into variable number of micro-ops and it must be +// determined dynamically. This directly relates to the itineraries +// global IssueWidth property, which constrains the number of microops +// that can issue per cycle. +// // OperandCycles are optional "cycle counts". They specify the cycle after // instruction issue the values which correspond to specific operand indices // are defined or read. Bypasses are optional "pipeline forwarding pathes", if @@ -106,8 +106,9 @@ def NoItinerary : InstrItinClass; // is reduced by 1. class InstrItinData<InstrItinClass Class, list<InstrStage> stages, list<int> operandcycles = [], - list<Bypass> bypasses = []> { + list<Bypass> bypasses = [], int uops = 1> { InstrItinClass TheClass = Class; + int NumMicroOps = uops; list<InstrStage> Stages = stages; list<int> OperandCycles = operandcycles; list<Bypass> Bypasses = bypasses; @@ -133,7 +134,8 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp, } // NoItineraries - A marker that can be used by processors without schedule -// info. +// info. Subtargets using NoItineraries can bypass the scheduler's +// expensive HazardRecognizer because no reservation table is needed. def NoItineraries : ProcessorItineraries<[], [], []>; // Processor itineraries with non-unit issue width. This allows issue diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 06130d1529..67f2e377f7 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -74,7 +74,10 @@ FunctionPass *createAggressiveDCEPass(); // if possible. // FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1, - bool UseDomTree = true); + bool UseDomTree = true, + signed StructMemberThreshold = -1, + signed ArrayElementThreshold = -1, + signed ScalarLoadThreshold = -1); //===----------------------------------------------------------------------===// // diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 2f9dc54541..8a939cc75e 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -202,10 +202,6 @@ void SplitLandingPadPredecessors(BasicBlock *OrigBB,ArrayRef<BasicBlock*> Preds, ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred); -/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a -/// given basic block. -DebugLoc GetFirstDebugLocInBasicBlock(const BasicBlock *BB); - } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index 17cd58eb01..6229cbc3e5 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -15,7 +15,7 @@ #ifndef TRANSFORMS_UTILS_BUILDLIBCALLS_H #define TRANSFORMS_UTILS_BUILDLIBCALLS_H -#include "llvm/Support/IRBuilder.h" +#include "llvm/IRBuilder.h" namespace llvm { class Value; diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 936d58efc3..84c0c5862e 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -15,10 +15,10 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/IRBuilder.h" +#include "llvm/Operator.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Target/TargetData.h" -#include "llvm/Operator.h" namespace llvm { diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h index 652916c26c..1e49a9c01e 100644 --- a/include/llvm/Transforms/Vectorize.h +++ b/include/llvm/Transforms/Vectorize.h @@ -28,6 +28,9 @@ struct VectorizeConfig { /// @brief The size of the native vector registers. unsigned VectorBits; + /// @brief Vectorize boolean values. + bool VectorizeBools; + /// @brief Vectorize integer values. bool VectorizeInts; @@ -49,6 +52,9 @@ struct VectorizeConfig { /// @brief Vectorize select instructions. bool VectorizeSelect; + /// @brief Vectorize comparison instructions. + bool VectorizeCmp; + /// @brief Vectorize getelementptr instructions. bool VectorizeGEP; @@ -80,6 +86,9 @@ struct VectorizeConfig { /// @brief The maximum number of pairing iterations. unsigned MaxIter; + /// @brief Don't try to form odd-length vectors. + bool Pow2LenOnly; + /// @brief Don't boost the chain-depth contribution of loads and stores. bool NoMemOpBoost; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index f80e2fba80..92e89068e4 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -501,7 +501,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { } // First, look up the PointerRec for this pointer. - PointerMapType::iterator I = PointerMap.find(PtrVal); + PointerMapType::iterator I = PointerMap.find_as(PtrVal); if (I == PointerMap.end()) return; // Noop // If we found one, remove the pointer from the alias set it is in. @@ -527,7 +527,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { AA.copyValue(From, To); // First, look up the PointerRec for this pointer. - PointerMapType::iterator I = PointerMap.find(From); + PointerMapType::iterator I = PointerMap.find_as(From); if (I == PointerMap.end()) return; // Noop assert(I->second->hasAliasSet() && "Dead entry?"); @@ -536,7 +536,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { if (Entry.hasAliasSet()) return; // Already in the tracker! // Add it to the alias set it aliases... - I = PointerMap.find(From); + I = PointerMap.find_as(From); AliasSet *AS = I->second->getAliasSet(*this); AS->addPointer(*this, Entry, I->second->getSize(), I->second->getTBAAInfo(), diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 20ecfd26a9..1d028c27b8 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -86,47 +86,10 @@ static bool isEscapeSource(const Value *V) { /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD, bool RoundToAlign = false) { - Type *AccessTy; - unsigned Align; - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (!GV->hasDefinitiveInitializer()) - return AliasAnalysis::UnknownSize; - AccessTy = GV->getType()->getElementType(); - Align = GV->getAlignment(); - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - if (!AI->isArrayAllocation()) - AccessTy = AI->getType()->getElementType(); - else - return AliasAnalysis::UnknownSize; - Align = AI->getAlignment(); - } else if (const CallInst* CI = extractMallocCall(V)) { - if (!RoundToAlign && !isArrayMalloc(V, &TD)) - // The size is the argument to the malloc call. - if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) - return C->getZExtValue(); - return AliasAnalysis::UnknownSize; - } else if (const Argument *A = dyn_cast<Argument>(V)) { - if (A->hasByValAttr()) { - AccessTy = cast<PointerType>(A->getType())->getElementType(); - Align = A->getParamAlignment(); - } else { - return AliasAnalysis::UnknownSize; - } - } else { - return AliasAnalysis::UnknownSize; - } - - if (!AccessTy->isSized()) - return AliasAnalysis::UnknownSize; - - uint64_t Size = TD.getTypeAllocSize(AccessTy); - // If there is an explicitly specified alignment, and we need to - // take alignment into account, round up the size. (If the alignment - // is implicit, getTypeAllocSize is sufficient.) - if (RoundToAlign && Align) - Size = RoundUpToAlignment(Size, Align); - - return Size; + uint64_t Size; + if (getObjectSize(V, Size, &TD, RoundToAlign)) + return Size; + return AliasAnalysis::UnknownSize; } /// isObjectSmallerThan - Return true if we can prove that the object specified diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 2e3ec8bebc..96e68b4199 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -12,9 +12,7 @@ add_llvm_library(LLVMAnalysis CaptureTracking.cpp CodeMetrics.cpp ConstantFolding.cpp - DIBuilder.cpp DbgInfoPrinter.cpp - DebugInfo.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp @@ -59,4 +57,6 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) +add_dependencies(LLVMAnalysis intrinsics_gen) + add_subdirectory(IPA) diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index cd832abeba..41cd34c07b 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -16,14 +16,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 8ffef29870..34d6d1bdd4 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,3 +5,5 @@ add_llvm_library(LLVMipa GlobalsModRef.cpp IPA.cpp ) + +add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 963da75234..449b7ee87b 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -246,7 +246,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS(cast<Value>(I)); - if (!CS || isa<IntrinsicInst>(I)) continue; + if (!CS) continue; + Function *Callee = CS.getCalledFunction(); + if (Callee && Callee->isIntrinsic()) continue; // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index c1d8e3e65a..22f6e96b53 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -329,15 +329,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); - if (isMalloc(Ptr)) { - // Okay, easy case. - } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { - Function *F = CI->getCalledFunction(); - if (!F || !F->isDeclaration()) return false; // Too hard to analyze. - if (F->getName() != "calloc") return false; // Not calloc. - } else { + if (!isAllocLikeFn(Ptr)) return false; // Too hard to analyze. - } // Analyze all uses of the allocation. If any of them are used in a // non-simple way (e.g. stored to another global) bail out. @@ -454,19 +447,18 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { for (inst_iterator II = inst_begin(SCC[i]->getFunction()), E = inst_end(SCC[i]->getFunction()); II != E && FunctionEffect != ModRef; ++II) - if (isa<LoadInst>(*II)) { + if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) { FunctionEffect |= Ref; - if (cast<LoadInst>(*II).isVolatile()) + if (LI->isVolatile()) // Volatile loads may have side-effects, so mark them as writing // memory (for example, a flag inside the processor). FunctionEffect |= Mod; - } else if (isa<StoreInst>(*II)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(&*II)) { FunctionEffect |= Mod; - if (cast<StoreInst>(*II).isVolatile()) + if (SI->isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isMalloc(&cast<Instruction>(*II)) || - isFreeCall(&cast<Instruction>(*II))) { + } else if (isAllocationFn(&*II) || isFreeCall(&*II)) { FunctionEffect |= ModRef; } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { // The callgraph doesn't include intrinsic calls. diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 7539d93862..9140786a1b 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -172,7 +172,7 @@ public: if (NewR.isEmptySet()) return markOverdefined(); - bool changed = Range == NewR; + bool changed = Range != NewR; Range = NewR; return changed; } @@ -457,8 +457,10 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { void LazyValueInfoCache::solve() { while (!BlockValueStack.empty()) { std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); - if (solveBlockValue(e.second, e.first)) + if (solveBlockValue(e.second, e.first)) { + assert(BlockValueStack.top() == e); BlockValueStack.pop(); + } } } @@ -766,15 +768,10 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, return true; } -/// getEdgeValue - This method attempts to infer more complex -bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result) { - // If already a constant, there is nothing to compute. - if (Constant *VC = dyn_cast<Constant>(Val)) { - Result = LVILatticeVal::get(VC); - return true; - } - +/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if +/// Val is not constrained on the edge. +static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { @@ -827,25 +824,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If we're interested in the false dest, invert the condition. if (!isTrueDest) TrueValues = TrueValues.inverse(); - - // Figure out the possible values of the query BEFORE this branch. - if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; - } - - LVILatticeVal InBlock = getBlockValue(Val, BBFrom); - if (!InBlock.isConstantRange()) { - Result = LVILatticeVal::getRange(TrueValues); - return true; - } - - // Find all potential values that satisfy both the input and output - // conditions. - ConstantRange PossibleValues = - TrueValues.intersectWith(InBlock.getConstantRange()); - - Result = LVILatticeVal::getRange(PossibleValues); + + Result = LVILatticeVal::getRange(TrueValues); return true; } } @@ -855,35 +835,71 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If the edge was formed by a switch on the value, then we may know exactly // what it is. if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { - if (SI->getCondition() == Val) { - // We don't know anything in the default case. - if (SI->getDefaultDest() == BBTo) { - Result.markOverdefined(); - return true; - } - - unsigned BitWidth = Val->getType()->getIntegerBitWidth(); - ConstantRange EdgesVals(BitWidth, false/*isFullSet*/); - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - if (i.getCaseSuccessor() != BBTo) continue; - ConstantRange EdgeVal(i.getCaseValue()->getValue()); + if (SI->getCondition() != Val) + return false; + + bool DefaultCase = SI->getDefaultDest() == BBTo; + unsigned BitWidth = Val->getType()->getIntegerBitWidth(); + ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + ConstantRange EdgeVal(i.getCaseValue()->getValue()); + if (DefaultCase) + EdgesVals = EdgesVals.difference(EdgeVal); + else if (i.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); - } - Result = LVILatticeVal::getRange(EdgesVals); - return true; } - } - - // Otherwise see if the value is known in the block. - if (hasBlockValue(Val, BBFrom)) { - Result = getBlockValue(Val, BBFrom); + Result = LVILatticeVal::getRange(EdgesVals); return true; } - BlockValueStack.push(std::make_pair(BBFrom, Val)); return false; } +/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at +/// the basic block if the edge does not constraint Val. +bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) { + Result = LVILatticeVal::get(VC); + return true; + } + + if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) { + if (!Result.isConstantRange() || + Result.getConstantRange().getSingleElement()) + return true; + + // FIXME: this check should be moved to the beginning of the function when + // LVI better supports recursive values. Even for the single value case, we + // can intersect to detect dead code (an empty range). + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // Try to intersect ranges of the BB and the constraint on the edge. + LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + if (!InBlock.isConstantRange()) + return true; + + ConstantRange Range = + Result.getConstantRange().intersectWith(InBlock.getConstantRange()); + Result = LVILatticeVal::getRange(Range); + return true; + } + + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // if we couldn't compute the value on the edge, use the value from the BB + Result = getBlockValue(Val, BBFrom); + return true; +} + LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index f7a60a1737..20c33a3d9d 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" @@ -29,6 +30,10 @@ #include <algorithm> using namespace llvm; +// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. +template class llvm::LoopBase<BasicBlock, Loop>; +template class llvm::LoopInfoBase<BasicBlock, Loop>; + // Always verify loopinfo if expensive checking is enabled. #ifdef XDEBUG static bool VerifyLoopInfo = true; @@ -507,7 +512,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // bool LoopInfo::runOnFunction(Function &) { releaseMemory(); - LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update + LI.Analyze(getAnalysis<DominatorTree>().getBase()); return false; } @@ -589,9 +594,6 @@ void LoopInfo::verifyAnalysis() const { } // Verify that blocks are mapped to valid loops. - // - // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we - // could also verify that the blocks are still in the correct loops. for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), E = LI.BBMap.end(); I != E; ++I) { assert(Loops.count(I->second) && "orphaned loop"); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index aba700ac5c..1540112fe1 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -162,7 +162,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { // Recurse through all subloops and all loops into LQ. static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { LQ.push_back(L); - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); } @@ -183,8 +183,12 @@ bool LPPassManager::runOnFunction(Function &F) { // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); - // Populate Loop Queue - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + // Populate the loop queue in reverse program order. There is no clear need to + // process sibling loops in either forward or reverse order. There may be some + // advantage in deleting uses in a later loop before optimizing the + // definitions in an earlier loop. If we find a clear reason to process in + // forward order, then a forward variant of LoopPassManager should be created. + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); if (LQ.empty()) // No loops, skip calling finalizers diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 9695ae1dec..b60b728b91 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -12,80 +12,168 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "memory-builtins" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Constants.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// malloc Call Utility Functions. -// +enum AllocType { + MallocLike = 1<<0, // allocates + CallocLike = 1<<1, // allocates + bzero + ReallocLike = 1<<2, // reallocates + StrDupLike = 1<<3, + AllocLike = MallocLike | CallocLike | StrDupLike, + AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike +}; + +struct AllocFnsTy { + const char *Name; + AllocType AllocTy; + unsigned char NumParams; + // First and Second size parameters (or -1 if unused) + signed char FstParam, SndParam; +}; + +// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to +// know which functions are nounwind, noalias, nocapture parameters, etc. +static const AllocFnsTy AllocationFnData[] = { + {"malloc", MallocLike, 1, 0, -1}, + {"valloc", MallocLike, 1, 0, -1}, + {"_Znwj", MallocLike, 1, 0, -1}, // new(unsigned int) + {"_ZnwjRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {"_Znwm", MallocLike, 1, 0, -1}, // new(unsigned long) + {"_ZnwmRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {"_Znaj", MallocLike, 1, 0, -1}, // new[](unsigned int) + {"_ZnajRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {"_Znam", MallocLike, 1, 0, -1}, // new[](unsigned long) + {"_ZnamRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {"posix_memalign", MallocLike, 3, 2, -1}, + {"calloc", CallocLike, 2, 0, 1}, + {"realloc", ReallocLike, 2, 1, -1}, + {"reallocf", ReallocLike, 2, 1, -1}, + {"strdup", StrDupLike, 1, -1, -1}, + {"strndup", StrDupLike, 2, -1, -1} +}; + + +static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { + if (LookThroughBitCast) + V = V->stripPointerCasts(); + + CallSite CS(const_cast<Value*>(V)); + if (!CS.getInstruction()) + return 0; -/// isMalloc - Returns true if the value is either a malloc call or a -/// bitcast of the result of a malloc call. -bool llvm::isMalloc(const Value *I) { - return extractMallocCall(I) || extractMallocCallFromBitCast(I); + Function *Callee = CS.getCalledFunction(); + if (!Callee || !Callee->isDeclaration()) + return 0; + return Callee; } -static bool isMallocCall(const CallInst *CI) { - if (!CI) - return false; +/// \brief Returns the allocation data for the given value if it is a call to a +/// known allocation function, and NULL otherwise. +static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + bool LookThroughBitCast = false) { + Function *Callee = getCalledFunction(V, LookThroughBitCast); + if (!Callee) + return 0; - Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return false; - if (Callee->getName() != "malloc" && - Callee->getName() != "_Znwj" && // operator new(unsigned int) - Callee->getName() != "_Znwm" && // operator new(unsigned long) - Callee->getName() != "_Znaj" && // operator new[](unsigned int) - Callee->getName() != "_Znam") // operator new[](unsigned long) - return false; + unsigned i = 0; + bool found = false; + for ( ; i < array_lengthof(AllocationFnData); ++i) { + if (Callee->getName() == AllocationFnData[i].Name) { + found = true; + break; + } + } + if (!found) + return 0; - // Check malloc prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin - // attribute will exist. + const AllocFnsTy *FnData = &AllocationFnData[i]; + if ((FnData->AllocTy & AllocTy) == 0) + return 0; + + // Check function prototype. + // FIXME: Check the nobuiltin metadata?? (PR5130) + int FstParam = FnData->FstParam; + int SndParam = FnData->SndParam; FunctionType *FTy = Callee->getFunctionType(); - return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && - FTy->getNumParams() == 1 && - (FTy->getParamType(0)->isIntegerTy(32) || - FTy->getParamType(0)->isIntegerTy(64)); + + if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == FnData->NumParams && + (FstParam < 0 || + (FTy->getParamType(FstParam)->isIntegerTy(32) || + FTy->getParamType(FstParam)->isIntegerTy(64))) && + (SndParam < 0 || + FTy->getParamType(SndParam)->isIntegerTy(32) || + FTy->getParamType(SndParam)->isIntegerTy(64))) + return FnData; + return 0; } -/// extractMallocCall - Returns the corresponding CallInst if the instruction -/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we -/// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I) { - const CallInst *CI = dyn_cast<CallInst>(I); - return (isMallocCall(CI)) ? CI : NULL; +static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { + ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); + return CS && CS.hasFnAttr(Attribute::NoAlias); } -CallInst *llvm::extractMallocCall(Value *I) { - CallInst *CI = dyn_cast<CallInst>(I); - return (isMallocCall(CI)) ? CI : NULL; + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup +/// like). +bool llvm::isAllocationFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, LookThroughBitCast); } -static bool isBitCastOfMallocCall(const BitCastInst *BCI) { - if (!BCI) - return false; - - return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0))); +/// \brief Tests if a value is a call or invoke to a function that returns a +/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). +bool llvm::isNoAliasFn(const Value *V, bool LookThroughBitCast) { + // it's safe to consider realloc as noalias since accessing the original + // pointer is undefined behavior + return isAllocationFn(V, LookThroughBitCast) || + hasNoAliasAttr(V, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory (such as malloc). +bool llvm::isMallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, LookThroughBitCast); } -/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the -/// instruction is a bitcast of the result of a malloc call. -CallInst *llvm::extractMallocCallFromBitCast(Value *I) { - BitCastInst *BCI = dyn_cast<BitCastInst>(I); - return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) - : NULL; +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates zero-filled memory (such as calloc). +bool llvm::isCallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, LookThroughBitCast); } -const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) { - const BitCastInst *BCI = dyn_cast<BitCastInst>(I); - return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) - : NULL; +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory (either malloc, calloc, or strdup like). +bool llvm::isAllocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// reallocates memory (such as realloc). +bool llvm::isReallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, LookThroughBitCast); +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *llvm::extractMallocCall(const Value *I) { + return isMallocLikeFn(I) ? dyn_cast<CallInst>(I) : 0; } static Value *computeArraySize(const CallInst *CI, const TargetData *TD, @@ -134,7 +222,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMalloc(CI) && "getMallocType and not malloc call"); + assert(isMallocLikeFn(CI) && "getMallocType and not malloc call"); PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -176,54 +264,18 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// determined. Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, bool LookThroughSExt) { - assert(isMalloc(CI) && "getMallocArraySize and not malloc call"); + assert(isMallocLikeFn(CI) && "getMallocArraySize and not malloc call"); return computeArraySize(CI, TD, LookThroughSExt); } -//===----------------------------------------------------------------------===// -// calloc Call Utility Functions. -// - -static bool isCallocCall(const CallInst *CI) { - if (!CI) - return false; - - Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return false; - if (Callee->getName() != "calloc") - return false; - - // Check malloc prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin - // attribute exists. - FunctionType *FTy = Callee->getFunctionType(); - return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && - FTy->getNumParams() == 2 && - ((FTy->getParamType(0)->isIntegerTy(32) && - FTy->getParamType(1)->isIntegerTy(32)) || - (FTy->getParamType(0)->isIntegerTy(64) && - FTy->getParamType(1)->isIntegerTy(64))); -} - /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. const CallInst *llvm::extractCallocCall(const Value *I) { - const CallInst *CI = dyn_cast<CallInst>(I); - return isCallocCall(CI) ? CI : 0; -} - -CallInst *llvm::extractCallocCall(Value *I) { - CallInst *CI = dyn_cast<CallInst>(I); - return isCallocCall(CI) ? CI : 0; + return isCallocLikeFn(I) ? cast<CallInst>(I) : 0; } -//===----------------------------------------------------------------------===// -// free Call Utility Functions. -// - /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I) { const CallInst *CI = dyn_cast<CallInst>(I); @@ -251,3 +303,405 @@ const CallInst *llvm::isFreeCall(const Value *I) { return CI; } + + + +//===----------------------------------------------------------------------===// +// Utility functions to compute size of objects. +// + + +/// \brief Compute the size of the object pointed by Ptr. Returns true and the +/// object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); + if (!Visitor.bothKnown(Data)) + return false; + + APInt ObjSize = Data.first, Offset = Data.second; + // check for overflow + if (Offset.slt(0) || ObjSize.ult(Offset)) + Size = 0; + else + Size = (ObjSize - Offset).getZExtValue(); + return true; +} + + +STATISTIC(ObjectVisitorArgument, + "Number of arguments with unsolved size and offset"); +STATISTIC(ObjectVisitorLoad, + "Number of load instructions with unsolved size and offset"); + + +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { + if (RoundToAlign && Align) + return APInt(IntTyBits, RoundUpToAlignment(Size.getZExtValue(), Align)); + return Size; +} + +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, + LLVMContext &Context, + bool RoundToAlign) +: TD(TD), RoundToAlign(RoundToAlign) { + IntegerType *IntTy = TD->getIntPtrType(Context); + IntTyBits = IntTy->getBitWidth(); + Zero = APInt::getNullValue(IntTyBits); +} + +SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + V = V->stripPointerCasts(); + + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return visitGEPOperator(*GEP); + if (Instruction *I = dyn_cast<Instruction>(V)) + return visit(*I); + if (Argument *A = dyn_cast<Argument>(V)) + return visitArgument(*A); + if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) + return visitConstantPointerNull(*P); + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return visitGlobalVariable(*GV); + if (UndefValue *UV = dyn_cast<UndefValue>(V)) + return visitUndefValue(*UV); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + return unknown(); // clueless + + DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V + << '\n'); + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType())); + if (!I.isArrayAllocation()) + return std::make_pair(align(Size, I.getAlignment()), Zero); + + Value *ArraySize = I.getArraySize(); + if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { + Size *= C->getValue().zextOrSelf(IntTyBits); + return std::make_pair(align(Size, I.getAlignment()), Zero); + } + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { + // no interprocedural analysis is done at the moment + if (!A.hasByValAttr()) { + ++ObjectVisitorArgument; + return unknown(); + } + PointerType *PT = cast<PointerType>(A.getType()); + APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType())); + return std::make_pair(align(Size, A.getParamAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + // TODO + return unknown(); + } + + ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt Size = Arg->getValue().zextOrSelf(IntTyBits); + // size determined by just 1 parameter + if (FnData->SndParam < 0) + return std::make_pair(Size, Zero); + + Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); + if (!Arg) + return unknown(); + + Size *= Arg->getValue().zextOrSelf(IntTyBits); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { + // Easy cases were already folded by previous passes. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetType PtrData = compute(GEP.getPointerOperand()); + if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) + return unknown(); + + SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end()); + APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops)); + return std::make_pair(PtrData.first, PtrData.second + Offset); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ + if (!GV.hasDefinitiveInitializer()) + return unknown(); + + APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType())); + return std::make_pair(align(Size, GV.getAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { + ++ObjectVisitorLoad; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { + SizeOffsetType TrueSide = compute(I.getTrueValue()); + SizeOffsetType FalseSide = compute(I.getFalseValue()); + if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide) + return TrueSide; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n'); + return unknown(); +} + + +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, + LLVMContext &Context) +: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)), +Visitor(TD, Context) { + IntTy = TD->getIntPtrType(Context); + Zero = ConstantInt::get(IntTy, 0); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { + SizeOffsetEvalType Result = compute_(V); + + if (!bothKnown(Result)) { + // erase everything that was computed in this iteration from the cache, so + // that no dangling references are left behind. We could be a bit smarter if + // we kept a dependency graph. It's probably not worth the complexity. + for (PtrSetTy::iterator I=SeenVals.begin(), E=SeenVals.end(); I != E; ++I) { + CacheMapTy::iterator CacheIt = CacheMap.find(*I); + // non-computable results can be safely cached + if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second)) + CacheMap.erase(CacheIt); + } + } + + SeenVals.clear(); + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { + SizeOffsetType Const = Visitor.compute(V); + if (Visitor.bothKnown(Const)) + return std::make_pair(ConstantInt::get(Context, Const.first), + ConstantInt::get(Context, Const.second)); + + V = V->stripPointerCasts(); + + // check cache + CacheMapTy::iterator CacheIt = CacheMap.find(V); + if (CacheIt != CacheMap.end()) + return CacheIt->second; + + // always generate code immediately before the instruction being + // processed, so that the generated code dominates the same BBs + Instruction *PrevInsertPoint = Builder.GetInsertPoint(); + if (Instruction *I = dyn_cast<Instruction>(V)) + Builder.SetInsertPoint(I); + + // record the pointers that were handled in this run, so that they can be + // cleaned later if something fails + SeenVals.insert(V); + + // now compute the size and offset + SizeOffsetEvalType Result; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + Result = visitGEPOperator(*GEP); + } else if (Instruction *I = dyn_cast<Instruction>(V)) { + Result = visit(*I); + } else if (isa<Argument>(V) || + (isa<ConstantExpr>(V) && + cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) || + isa<GlobalVariable>(V)) { + // ignore values where we cannot do more than what ObjectSizeVisitor can + Result = unknown(); + } else { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " + << *V << '\n'); + Result = unknown(); + } + + if (PrevInsertPoint) + Builder.SetInsertPoint(PrevInsertPoint); + + // Don't reuse CacheIt since it may be invalid at this point. + CacheMap[V] = Result; + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + // must be a VLA + assert(I.isArrayAllocation()); + Value *ArraySize = I.getArraySize(); + Value *Size = ConstantInt::get(ArraySize->getType(), + TD->getTypeAllocSize(I.getAllocatedType())); + Size = Builder.CreateMul(Size, ArraySize); + return std::make_pair(Size, Zero); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + // TODO + return unknown(); + } + + Value *FirstArg = CS.getArgument(FnData->FstParam); + FirstArg = Builder.CreateZExt(FirstArg, IntTy); + if (FnData->SndParam < 0) + return std::make_pair(FirstArg, Zero); + + Value *SecondArg = CS.getArgument(FnData->SndParam); + SecondArg = Builder.CreateZExt(SecondArg, IntTy); + Value *Size = Builder.CreateMul(FirstArg, SecondArg); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand()); + if (!bothKnown(PtrData)) + return unknown(); + + Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP); + Offset = Builder.CreateAdd(PtrData.second, Offset); + return std::make_pair(PtrData.first, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) { + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { + // create 2 PHIs: one for size and another for offset + PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + + // insert right away in the cache to handle recursive PHIs + CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI); + + // compute offset/size for each PHI incoming pointer + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { + Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); + SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); + + if (!bothKnown(EdgeData)) { + OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); + OffsetPHI->eraseFromParent(); + SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); + SizePHI->eraseFromParent(); + return unknown(); + } + SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i)); + OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i)); + } + return std::make_pair(SizePHI, OffsetPHI); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { + SizeOffsetEvalType TrueSide = compute_(I.getTrueValue()); + SizeOffsetEvalType FalseSide = compute_(I.getFalseValue()); + + if (!bothKnown(TrueSide) || !bothKnown(FalseSide)) + return unknown(); + if (TrueSide == FalseSide) + return TrueSide; + + Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first, + FalseSide.first); + Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second, + FalseSide.second); + return std::make_pair(Size, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n'); + return unknown(); +} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 21ab7a8d38..7fb154d68a 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -474,8 +474,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - if (isa<AllocaInst>(Inst) || - (isa<CallInst>(Inst) && extractMallocCall(Inst))) { + if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index e7e999cebe..f8c7514819 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -16,10 +16,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Pass.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 886812be5f..b8c92f83d9 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -2726,7 +2726,7 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find(V); + ValueExprMapType::const_iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) return I->second; const SCEV *S = createSCEV(V); @@ -2963,7 +2963,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast<Value *>(I)); + ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3020,7 +3020,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find(PN) == ValueExprMap.end() && + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); @@ -4084,7 +4084,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast<Value *>(I)); + ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -4135,7 +4135,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4168,7 +4169,8 @@ void ScalarEvolution::forgetValue(Value *V) { I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast<Value *>(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -5484,7 +5486,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); - if (StepC == 0) + if (StepC == 0 || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index dabcbaa9e8..670c1bbe98 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -474,6 +474,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(extern_weak); KEYWORD(external); KEYWORD(thread_local); + KEYWORD(localdynamic); + KEYWORD(initialexec); + KEYWORD(localexec); KEYWORD(zeroinitializer); KEYWORD(undef); KEYWORD(null); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index fe415615d8..095b7c5f67 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -645,12 +645,13 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, unsigned Visibility) { unsigned AddrSpace; - bool ThreadLocal, IsConstant, UnnamedAddr; + bool IsConstant, UnnamedAddr; + GlobalVariable::ThreadLocalMode TLM; LocTy UnnamedAddrLoc; LocTy TyLoc; Type *Ty = 0; - if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) || + if (ParseOptionalThreadLocal(TLM) || ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || @@ -691,7 +692,8 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (GV == 0) { GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0, - Name, 0, false, AddrSpace); + Name, 0, GlobalVariable::NotThreadLocal, + AddrSpace); } else { if (GV->getType()->getElementType() != Ty) return Error(TyLoc, @@ -710,7 +712,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setConstant(IsConstant); GV->setLinkage((GlobalValue::LinkageTypes)Linkage); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); - GV->setThreadLocal(ThreadLocal); + GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); // Parse attributes on the global. @@ -858,6 +860,46 @@ bool LLParser::ParseUInt32(unsigned &Val) { return false; } +/// ParseTLSModel +/// := 'localdynamic' +/// := 'initialexec' +/// := 'localexec' +bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) { + switch (Lex.getKind()) { + default: + return TokError("expected localdynamic, initialexec or localexec"); + case lltok::kw_localdynamic: + TLM = GlobalVariable::LocalDynamicTLSModel; + break; + case lltok::kw_initialexec: + TLM = GlobalVariable::InitialExecTLSModel; + break; + case lltok::kw_localexec: + TLM = GlobalVariable::LocalExecTLSModel; + break; + } + + Lex.Lex(); + return false; +} + +/// ParseOptionalThreadLocal +/// := /*empty*/ +/// := 'thread_local' +/// := 'thread_local' '(' tlsmodel ')' +bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) { + TLM = GlobalVariable::NotThreadLocal; + if (!EatIfPresent(lltok::kw_thread_local)) + return false; + + TLM = GlobalVariable::GeneralDynamicTLSModel; + if (Lex.getKind() == lltok::lparen) { + Lex.Lex(); + return ParseTLSModel(TLM) || + ParseToken(lltok::rparen, "expected ')' after thread local model"); + } + return false; +} /// ParseOptionalAddrSpace /// := /*empty*/ diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index dda8808381..257c726229 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -171,6 +171,9 @@ namespace llvm { Loc = Lex.getLoc(); return ParseUInt32(Val); } + + bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); + bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index adf5d4f4d0..0461e7b63a 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -44,13 +44,14 @@ namespace lltok { kw_unnamed_addr, kw_extern_weak, kw_external, kw_thread_local, + kw_localdynamic, kw_initialexec, kw_localexec, kw_zeroinitializer, kw_undef, kw_null, kw_to, kw_tail, kw_target, kw_triple, - kw_unwind, + kw_unwind, kw_deplibs, kw_datalayout, kw_volatile, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 60866ab832..295d609f89 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -102,6 +102,17 @@ static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) { } } +static GlobalVariable::ThreadLocalMode GetDecodedThreadLocalMode(unsigned Val) { + switch (Val) { + case 0: return GlobalVariable::NotThreadLocal; + default: // Map unknown non-zero value to general dynamic. + case 1: return GlobalVariable::GeneralDynamicTLSModel; + case 2: return GlobalVariable::LocalDynamicTLSModel; + case 3: return GlobalVariable::InitialExecTLSModel; + case 4: return GlobalVariable::LocalExecTLSModel; + } +} + static int GetDecodedCastOpcode(unsigned Val) { switch (Val) { default: return -1; @@ -1552,9 +1563,10 @@ bool BitcodeReader::ParseModule(bool Resume) { GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; if (Record.size() > 6) Visibility = GetDecodedVisibility(Record[6]); - bool isThreadLocal = false; + + GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; if (Record.size() > 7) - isThreadLocal = Record[7]; + TLM = GetDecodedThreadLocalMode(Record[7]); bool UnnamedAddr = false; if (Record.size() > 8) @@ -1562,12 +1574,11 @@ bool BitcodeReader::ParseModule(bool Resume) { GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, - isThreadLocal, AddressSpace); + TLM, AddressSpace); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); NewGV->setVisibility(Visibility); - NewGV->setThreadLocal(isThreadLocal); NewGV->setUnnamedAddr(UnnamedAddr); ValueList.push_back(NewGV); diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt index 693d4310b8..dfe7e1065c 100644 --- a/lib/Bitcode/Reader/CMakeLists.txt +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -2,3 +2,5 @@ add_llvm_library(LLVMBitReader BitReader.cpp BitcodeReader.cpp ) + +add_dependencies(LLVMBitReader intrinsics_gen) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 6526b012d8..5b1725f550 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -379,6 +379,17 @@ static unsigned getEncodedVisibility(const GlobalValue *GV) { llvm_unreachable("Invalid visibility"); } +static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) { + switch (GV->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: return 0; + case GlobalVariable::GeneralDynamicTLSModel: return 1; + case GlobalVariable::LocalDynamicTLSModel: return 2; + case GlobalVariable::InitialExecTLSModel: return 3; + case GlobalVariable::LocalExecTLSModel: return 4; + } + llvm_unreachable("Invalid TLS model"); +} + // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, @@ -487,7 +498,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, GV->getVisibility() != GlobalValue::DefaultVisibility || GV->hasUnnamedAddr()) { Vals.push_back(getEncodedVisibility(GV)); - Vals.push_back(GV->isThreadLocal()); + Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV->hasUnnamedAddr()); } else { AbbrevToUse = SimpleGVarAbbrev; @@ -1157,19 +1168,38 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals64.push_back(SI.getNumCases()); for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - IntegersSubset CaseRanges = i.getCaseValueEx(); - Vals64.push_back(CaseRanges.getNumItems()); - for (unsigned ri = 0, rn = CaseRanges.getNumItems(); ri != rn; ++ri) { - IntegersSubset::Range r = CaseRanges.getItem(ri); - bool IsSingleNumber = r.isSingleNumber(); - - Vals64.push_back(IsSingleNumber); - - unsigned Code, Abbrev; // will unused. + IntegersSubset& CaseRanges = i.getCaseValueEx(); + unsigned Code, Abbrev; // will unused. + + if (CaseRanges.isSingleNumber()) { + Vals64.push_back(1/*NumItems = 1*/); + Vals64.push_back(true/*IsSingleNumber = true*/); + EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true); + } else { - EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true); - if (!IsSingleNumber) - EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true); + Vals64.push_back(CaseRanges.getNumItems()); + + if (CaseRanges.isSingleNumbersOnly()) { + for (unsigned ri = 0, rn = CaseRanges.getNumItems(); + ri != rn; ++ri) { + + Vals64.push_back(true/*IsSingleNumber = true*/); + + EmitAPInt(Vals64, Code, Abbrev, + CaseRanges.getSingleNumber(ri), true); + } + } else + for (unsigned ri = 0, rn = CaseRanges.getNumItems(); + ri != rn; ++ri) { + IntegersSubset::Range r = CaseRanges.getItem(ri); + bool IsSingleNumber = CaseRanges.isSingleNumber(ri); + + Vals64.push_back(IsSingleNumber); + + EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true); + if (!IsSingleNumber) + EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true); + } } Vals64.push_back(VE.getValueID(i.getCaseSuccessor())); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 01f75a78b7..84277604ef 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -629,7 +629,7 @@ bool AsmPrinter::needsSEHMoves() { } bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsForStringPool(); + return MAI->doesDwarfUseRelocationsAcrossSections(); } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { @@ -1445,13 +1445,14 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size) const { - // Emit Label+Offset - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), - MCConstantExpr::Create(Offset, OutContext), - OutContext); + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), + OutContext); - OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 1844e44a47..db43b06c70 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -409,9 +409,28 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { - // Target doesn't support this yet! + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'c': // Substitute immediate value without immediate syntax + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'n': // Negate the immediate constant. + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << -MO.getImm(); + return false; + } + } return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index bc1268c19e..9d17767f3b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -17,9 +17,9 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 45e407e27f..b4ff9e8d69 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,7 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 734e7b9195..fa7fb1f8d0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -17,9 +17,10 @@ #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DIBuilder.h" #include "llvm/Module.h" #include "llvm/Instructions.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -32,11 +33,10 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -594,7 +594,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, Asm->GetTempSymbol("section_line")); else @@ -1652,7 +1652,7 @@ void DwarfDebug::emitDIE(DIE *Die) { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast<DIEInteger>(Values[i]); - if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, V->getValue(), 4); @@ -1665,10 +1665,14 @@ void DwarfDebug::emitDIE(DIE *Die) { break; } case dwarf::DW_AT_location: { - if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) - Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - else + if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelReference(L->getValue(), 4); + else + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else { Values[i]->EmitValue(Asm, Form); + } break; } case dwarf::DW_AT_accessibility: { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index d153c0dd0c..b16e947476 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,11 +14,11 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "DIE.h" +#include "llvm/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Analysis/DebugInfo.h" -#include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -235,7 +235,7 @@ class DwarfDebug { /// ScopeVariables - Collection of dbg variables of a scope. DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables; - /// AbstractVariables - Collection on abstract variables. + /// AbstractVariables - Collection of abstract variables. DenseMap<const MDNode *, DbgVariable *> AbstractVariables; /// DotDebugLocEntries - Collection of DotDebugLocEntry. @@ -333,9 +333,6 @@ private: /// of the function. DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S); - /// constructScopeDIE - Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); @@ -526,9 +523,6 @@ public: // @LOCALMOD-END - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getStringPool - returns the entry into the start of the pool. MCSymbol *getStringPool(); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 34947cee32..a2ce7a004d 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -80,7 +80,6 @@ add_llvm_library(LLVMCodeGen RegisterCoalescer.cpp RegisterPressure.cpp RegisterScavenging.cpp - RenderMachineFunction.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp @@ -105,5 +104,7 @@ add_llvm_library(LLVMCodeGen VirtRegMap.cpp ) +add_dependencies(LLVMCodeGen intrinsics_gen) + add_subdirectory(SelectionDAG) add_subdirectory(AsmPrinter) diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 3782a1d99a..939af3f0cc 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -39,18 +39,20 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { MachineFunctionPass::getAnalysisUsage(au); } -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " - << fn.getFunction()->getName() << '\n'); - - LiveIntervals &lis = getAnalysis<LiveIntervals>(); - VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>()); - for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { - LiveInterval &li = *I->second; - if (TargetRegisterInfo::isVirtualRegister(li.reg)) - vrai.CalculateWeightAndHint(li); + << MF.getFunction()->getName() << '\n'); + + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + VirtRegAuxInfo VRAI(MF, LIS, getAnalysis<MachineLoopInfo>()); + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); } return false; } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 81e237effb..89de59a12a 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -50,8 +50,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; - if (MF.getFrameInfo()->getMaxAlignment() < Align) - MF.getFrameInfo()->setMaxAlignment(Align); + MF.getFrameInfo()->ensureMaxAlignment(Align); TM.getTargetLowering()->HandleByVal(this, Size); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 2c7a427f39..7b015d850d 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -53,7 +53,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); - initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 9833097c8d..4c7f5d8c88 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1273,8 +1273,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << LIS.getInterval(Original) << '\n'); + << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() + << "\nFrom original " << LIS.getInterval(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 9b6d496b06..1541bf0c85 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -39,7 +39,7 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { unsigned E = PhysRegEntries[PhysReg]; if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { if (!Entries[E].valid(LIUArray, TRI)) - Entries[E].revalidate(); + Entries[E].revalidate(LIUArray, TRI); return &Entries[E]; } // No valid entry exists, pick the next round-robin entry. @@ -61,13 +61,15 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { } /// revalidate - LIU contents have changed, update tags. -void InterferenceCache::Entry::revalidate() { +void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { // Invalidate all block entries. ++Tag; // Invalidate all iterators. PrevPos = SlotIndex(); - for (unsigned i = 0, e = Aliases.size(); i != e; ++i) - Aliases[i].second = Aliases[i].first->getTag(); + unsigned i = 0; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) + RegUnits[i].VirtTag = LIUArray[*Units].getTag(); } void InterferenceCache::Entry::reset(unsigned physReg, @@ -79,28 +81,23 @@ void InterferenceCache::Entry::reset(unsigned physReg, ++Tag; PhysReg = physReg; Blocks.resize(MF->getNumBlockIDs()); - Aliases.clear(); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { - LiveIntervalUnion *LIU = LIUArray + *AI; - Aliases.push_back(std::make_pair(LIU, LIU->getTag())); - } // Reset iterators. PrevPos = SlotIndex(); - unsigned e = Aliases.size(); - Iters.resize(e); - for (unsigned i = 0; i != e; ++i) - Iters[i].setMap(Aliases[i].first->getMap()); + RegUnits.clear(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + RegUnits.push_back(LIUArray[*Units]); + RegUnits.back().Fixed = &LIS->getRegUnit(*Units); + } } bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI) { - unsigned i = 0, e = Aliases.size(); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI, ++i) { - LiveIntervalUnion *LIU = LIUArray + *AI; - if (i == e || Aliases[i].first != LIU) + unsigned i = 0, e = RegUnits.size(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) { + if (i == e) return false; - if (LIU->changedSince(Aliases[i].second)) + if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag)) return false; } return i == e; @@ -112,12 +109,20 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Use advanceTo only when possible. if (PrevPos != Start) { - if (!PrevPos.isValid() || Start < PrevPos) - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].find(Start); - else - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].advanceTo(Start); + if (!PrevPos.isValid() || Start < PrevPos) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.find(Start); + RUI.FixedI = RUI.Fixed->find(Start); + } + } else { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.advanceTo(Start); + if (RUI.FixedI != RUI.Fixed->end()) + RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); + } + } PrevPos = Start; } @@ -129,9 +134,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); - // Check for first interference. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + // Check for first interference from virtregs. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid()) continue; SlotIndex StartI = I.start(); @@ -141,6 +146,19 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = StartI; } + // Same thing for fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::const_iterator I = RegUnits[i].FixedI; + LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + if (I == E) + continue; + SlotIndex StartI = I->start; + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + // Also check for register mask interference. RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); @@ -168,8 +186,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Check for last interference in block. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid() || I.start() >= Stop) continue; I.advanceTo(Stop); @@ -183,6 +201,23 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { ++I; } + // Fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::iterator &I = RegUnits[i].FixedI; + LiveInterval *LI = RegUnits[i].Fixed; + if (I == LI->end() || I->start >= Stop) + continue; + I = LI->advanceTo(I, Stop); + bool Backup = I == LI->end() || I->start >= Stop; + if (Backup) + --I; + SlotIndex StopI = I->end; + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + // Also check for register mask interference. SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; for (unsigned i = RegMaskSlots.size(); diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 485a325aa1..3c928a5086 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// InterferenceCache remembers per-block interference from LiveIntervalUnions, +// fixed RegUnit interference, and register masks. // //===----------------------------------------------------------------------===// @@ -59,14 +60,31 @@ class InterferenceCache { /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; - /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of - /// PhysReg. - SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases; + /// RegUnitInfo - Information tracked about each RegUnit in PhysReg. + /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos) + /// had just been called. + struct RegUnitInfo { + /// Iterator pointing into the LiveIntervalUnion containing virtual + /// register interference. + LiveIntervalUnion::SegmentIter VirtI; - typedef LiveIntervalUnion::SegmentIter Iter; + /// Tag of the LIU last time we looked. + unsigned VirtTag; - /// Iters - an iterator for each alias - SmallVector<Iter, 8> Iters; + /// Fixed interference in RegUnit. + LiveInterval *Fixed; + + /// Iterator pointing into the fixed RegUnit interference. + LiveInterval::iterator FixedI; + + RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + VirtI.setMap(LIU.getMap()); + } + }; + + /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have + /// more than 4 RegUnits. + SmallVector<RegUnitInfo, 4> RegUnits; /// Blocks - Interference for each block in the function. SmallVector<BlockInterference, 8> Blocks; @@ -91,7 +109,7 @@ class InterferenceCache { bool hasRefs() const { return RefCount > 0; } - void revalidate(); + void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); /// valid - Return true if this is a valid entry for physReg. bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 8ccab9cd5d..ba447ce6d4 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; template <class ArgIt> diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 30963a2ea9..cac0c83bca 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/PassManager.h" +#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -78,40 +79,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } -/// Turn exception handling constructs into something the code generators can -/// handle. -static void addPassesToHandleExceptions(TargetMachine *TM, - PassManagerBase &PM) { - switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); - // FALLTHROUGH - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - case ExceptionHandling::Win64: - PM.add(createDwarfEHPass(TM)); - break; - case ExceptionHandling::None: - PM.add(createLowerInvokePass(TM->getTargetLowering())); - - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - break; - } -} - /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific sublass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); + PassConfig->setStartStopPasses(StartAfter, StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); @@ -120,7 +96,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->addIRPasses(); - addPassesToHandleExceptions(TM, PM); + PassConfig->addPassesToHandleExceptions(); PassConfig->addISelPrepare(); @@ -155,12 +131,25 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, + StartAfter, StopAfter); if (!Context) return true; + if (StopAfter) { + // FIXME: The intent is that this should eventually write out a YAML file, + // containing the LLVM IR, the machine-level IR (when stopping after a + // machine-level pass), and whatever other information is needed to + // deserialize the code and resume compilation. For now, just write the + // LLVM IR. + PM.add(createPrintModulePass(&Out)); + return false; + } + if (hasMCSaveTempLabels()) Context->setAllowTemporaryLabels(false); @@ -244,7 +233,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Context) return true; @@ -264,7 +253,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, raw_ostream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Ctx) return true; diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index f1abcbb1dd..6b6b9d084e 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -16,8 +16,8 @@ #define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 5b7941f461..d631726538 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -23,9 +23,9 @@ #include "LiveDebugVariables.h" #include "VirtRegMap.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Metadata.h" #include "llvm/Value.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -243,7 +243,7 @@ public: /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(MachineRegisterInfo &MRI, + void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -618,6 +618,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -634,15 +635,32 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; + if (!Loc.isReg()) { + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + continue; + } + // Register locations are constrained to where the register value is live. - if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { - LiveInterval *LI = &LIS.getInterval(Loc.getReg()); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + LiveInterval *LI = 0; + const VNInfo *VNI = 0; + if (LIS.hasInterval(Loc.getReg())) { + LI = &LIS.getInterval(Loc.getReg()); + VNI = LI->getVNInfoAt(Idx); + } SmallVector<SlotIndex, 16> Kills; extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); - addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); - } else - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + if (LI) + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + continue; + } + + // For physregs, use the live range of the first regunit as a guide. + unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); + LiveInterval *LI = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LI->getVNInfoAt(Idx); + // Don't track copies from physregs, it is too expensive. + extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -656,7 +674,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, void LDVImpl::computeIntervals() { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); - userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); userValues[i]->mapVirtRegs(this); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index c5bee077ad..819707f59f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -27,12 +27,10 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "LiveRangeCalc.h" #include <algorithm> @@ -40,11 +38,6 @@ #include <cmath> using namespace llvm; -// Temporary option to enable regunit liveness. -static cl::opt<bool> LiveRegUnits("live-regunits", cl::Hidden); - -STATISTIC(numIntervals , "Number of original intervals"); - char LiveIntervals::ID = 0; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) @@ -62,8 +55,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); - if (LiveRegUnits) - AU.addRequiredTransitiveID(MachineDominatorsID); + AU.addRequiredTransitiveID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addPreserved<SlotIndexes>(); AU.addRequiredTransitive<SlotIndexes>(); @@ -81,11 +73,9 @@ LiveIntervals::~LiveIntervals() { void LiveIntervals::releaseMemory() { // Free the live intervals themselves. - for (DenseMap<unsigned, LiveInterval*>::iterator I = R2IMap.begin(), - E = R2IMap.end(); I != E; ++I) - delete I->second; - - R2IMap.clear(); + for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) + delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); RegMaskBlocks.clear(); @@ -109,20 +99,14 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { AA = &getAnalysis<AliasAnalysis>(); LV = &getAnalysis<LiveVariables>(); Indexes = &getAnalysis<SlotIndexes>(); - if (LiveRegUnits) - DomTree = &getAnalysis<MachineDominatorTree>(); - if (LiveRegUnits && !LRCalc) + DomTree = &getAnalysis<MachineDominatorTree>(); + if (!LRCalc) LRCalc = new LiveRangeCalc(); AllocatableRegs = TRI->getAllocatableSet(fn); ReservedRegs = TRI->getReservedRegs(fn); computeIntervals(); - - numIntervals += getNumIntervals(); - - if (LiveRegUnits) { - computeLiveInRegUnits(); - } + computeLiveInRegUnits(); DEBUG(dump()); return true; @@ -132,21 +116,17 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; - // Dump the physregs. - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = R2IMap.lookup(Reg)) - OS << PrintReg(Reg, TRI) << '\t' << *LI << '\n'; - // Dump the regunits. for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) if (LiveInterval *LI = RegUnitIntervals[i]) OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; // Dump the virtregs. - for (unsigned Reg = 0, RegE = MRI->getNumVirtRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = - R2IMap.lookup(TargetRegisterInfo::index2VirtReg(Reg))) - OS << PrintReg(LI->reg) << '\t' << *LI << '\n'; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (hasInterval(Reg)) + OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + } printInstrs(OS); } @@ -254,8 +234,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, bool PHIJoin = LV->isPHIJoin(interval.reg); if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a new - // valno in the killing blocks. + // A phi join register is killed at the end of the MBB and revived as a + // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); ValNo->setHasPHIKill(true); @@ -266,7 +246,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); + LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), + ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR); } @@ -369,101 +350,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << '\n'); } -static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) { - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) { - const MachineBasicBlock* succ = *SI; - if (succ->isLiveIn(Reg)) - return true; - } - return false; -} - -void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); - SlotIndex end = start; - - // If it is not used after definition, it is considered dead at - // the instruction defining it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - // For earlyclobbers, the defSlot was pushed back one; the extra - // advance below compensates. - if (MO.isDead()) { - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - goto exit; - } - - // If it is not dead on definition, it must be killed by a - // subsequent instruction. Hence its interval is: - // [defSlot(def), useSlot(kill)+1) - baseIndex = baseIndex.getNextIndex(); - while (++mi != MBB->end()) { - - if (mi->isDebugValue()) - continue; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = Indexes->getNextNonNullIndex(baseIndex); - - if (mi->killsRegister(interval.reg, TRI)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - goto exit; - } else { - int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,TRI); - if (DefIdx != -1) { - if (mi->isRegTiedToUseOperand(DefIdx)) { - // Two-address instruction. - end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber()); - } else { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that - // defines it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - } - goto exit; - } - } - - baseIndex = baseIndex.getNextIndex(); - } - - // If we get here the register *should* be live out. - assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!"); - - // FIXME: We need saner rules for reserved regs. - if (isReserved(interval.reg)) { - end = start.getDeadSlot(); - } else { - // Unreserved, unallocable registers like EFLAGS can be live across basic - // block boundaries. - assert(isRegLiveIntoSuccessor(MBB, interval.reg) && - "Unreserved reg not live-out?"); - end = getMBBEndIdx(MBB); - } -exit: - assert(start < end && "did not find end of interval?"); - - // Already exists? Extend old live interval. - VNInfo *ValNo = interval.getVNInfoAt(start); - bool Extend = ValNo != 0; - if (!Extend) - ValNo = interval.getNextValue(start, VNInfoAllocator); - LiveRange LR(start, end, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); -} - void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, SlotIndex MIIdx, @@ -472,93 +358,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(MO.getReg())); -} - -void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, - SlotIndex MIIdx, - LiveInterval &interval) { - assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) && - "Only physical registers can be live in."); - assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() || - MBB->isLandingPad()) && - "Allocatable live-ins only valid for entry blocks and landing pads."); - - DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, TRI)); - - // Look for kills, if it reaches a def before it's killed, then it shouldn't - // be considered a livein. - MachineBasicBlock::iterator mi = MBB->begin(); - MachineBasicBlock::iterator E = MBB->end(); - // Skip over DBG_VALUE at the start of the MBB. - if (mi != E && mi->isDebugValue()) { - while (++mi != E && mi->isDebugValue()) - ; - if (mi == E) - // MBB is empty except for DBG_VALUE's. - return; - } - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = Indexes->getNextNonNullIndex(baseIndex); - - SlotIndex end = baseIndex; - bool SeenDefUse = false; - - while (mi != E) { - if (mi->killsRegister(interval.reg, TRI)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - SeenDefUse = true; - break; - } else if (mi->modifiesRegister(interval.reg, TRI)) { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that defines - // it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - SeenDefUse = true; - break; - } - - while (++mi != E && mi->isDebugValue()) - // Skip over DBG_VALUE. - ; - if (mi != E) - baseIndex = Indexes->getNextNonNullIndex(baseIndex); - } - - // Live-in register might not be used at all. - if (!SeenDefUse) { - if (isAllocatable(interval.reg) || - !isRegLiveIntoSuccessor(MBB, interval.reg)) { - // Allocatable registers are never live through. - // Non-allocatable registers that aren't live into any successors also - // aren't live through. - DEBUG(dbgs() << " dead"); - return; - } else { - // If we get here the register is non-allocatable and live into some - // successor. We'll conservatively assume it's live-through. - DEBUG(dbgs() << " live through"); - end = getMBBEndIdx(MBB); - } - } - - SlotIndex defIdx = getMBBStartIdx(MBB); - assert(getInstructionFromIndex(defIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator); - vni->setIsPHIDef(true); - LiveRange LR(start, end, vni); - - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); } /// computeIntervals - computes the live intervals for virtual @@ -586,12 +385,6 @@ void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "BB#" << MBB->getNumber() << ":\t\t# derived from " << MBB->getName() << "\n"); - // Create intervals for live-ins to this BB first. - for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), - LE = MBB->livein_end(); LI != LE; ++LI) { - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - } - // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) MIIndex = Indexes->getNextNonNullIndex(MIIndex); @@ -615,7 +408,7 @@ void LiveIntervals::computeIntervals() { continue; } - if (!MO.isReg() || !MO.getReg()) + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // handle register defs - build intervals @@ -883,13 +676,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // void LiveIntervals::addKillFlags() { - for (iterator I = begin(), E = end(); I != E; ++I) { - unsigned Reg = I->first; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - continue; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = I->second; + LiveInterval *LI = &getInterval(Reg); // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -1192,78 +983,44 @@ private: // TODO: Currently we're skipping uses that are reserved or have no // interval, but we're not updating their kills. This should be // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)) continue; - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - if (MO.isEarlyClobber()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true)); - assert(LR != 0 && "No EC range?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); - else - Internal.insert(std::make_pair(LI, LR)); - } else if (MO.isDead()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No dead-def range?"); - Internal.insert(std::make_pair(LI, LR)); - } else { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot()); - assert(LR && LR->end > OldIdx.getDeadSlot() && - "Non-dead-def should have live range exiting."); - Exiting.insert(std::make_pair(LI, LR)); - } + // Collect ranges for register units. These live ranges are computed on + // demand, so just skip any that haven't been computed yet. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) + collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx); + } else { + // Collect ranges for individual virtual registers. + collectRanges(MO, &LIS.getInterval(Reg), + Entering, Internal, Exiting, OldIdx); } } } - // Collect IntRangePairs for all operands of MI that may need fixing. - void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering, - RangeSet& Exiting, SlotIndex MIStartIdx, - SlotIndex MIEndIdx) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - assert(!MO.isRegMask() && "Can't have RegMasks in bundles."); - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // TODO: Currently we're skipping uses that are reserved or have no - // interval, but we're not updating their kills. This should be - // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) - continue; - - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles."); - assert(!MO.isDead() && "Dead-defs not allowed in bundles."); - LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot()); - assert(LR != 0 && "Internal ranges not allowed in bundles."); + void collectRanges(const MachineOperand &MO, LiveInterval *LI, + RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, + SlotIndex OldIdx) { + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); + assert(LR != 0 && "No live range for def?"); + if (LR->end > OldIdx.getDeadSlot()) Exiting.insert(std::make_pair(LI, LR)); - } + else + Internal.insert(std::make_pair(LI, LR)); } } - BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) { + BundleRanges createBundleRanges(RangeSet& Entering, + RangeSet& Internal, + RangeSet& Exiting) { BundleRanges BR; for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); @@ -1300,7 +1057,8 @@ private: return; // Bail out if we don't have kill flags on the old register. MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill."); + assert(!NewKillMI->killsRegister(reg) && + "New kill instr is already a kill."); OldKillMI->clearRegisterKills(reg, &TRI); NewKillMI->addRegisterKilled(reg, &TRI); } @@ -1553,7 +1311,8 @@ void LiveIntervals::handleMove(MachineInstr* MI) { HME.moveAllRangesFrom(MI, OldIndex); } -void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) { +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, + MachineInstr* BundleStart) { SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); HMEditor HME(*this, *MRI, *TRI, NewIndex); HME.moveAllRangesInto(MI, BundleStart); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 434388b94e..261d860e01 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -82,12 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.isDef()) - continue; - // Reserved registers are OK. - if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) + if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; + // We can't remat physreg uses, unless it is a constant. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + continue; + return false; + } + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 61e1432b0e..cdb1776812 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -110,7 +110,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, // The BitVector is indexed by PhysReg, not register unit. // Regmask interference is more fine grained than regunits. // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8. - return !RegMaskUsable.empty() && !RegMaskUsable.test(PhysReg); + return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); } bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h index 4c3e7d4782..b3e2d7f4b4 100644 --- a/lib/CodeGen/LiveRegMatrix.h +++ b/lib/CodeGen/LiveRegMatrix.h @@ -124,7 +124,8 @@ public: /// Check for regmask interference only. /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. - bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg); + /// If PhysReg is null, check if VirtReg crosses any regmask operands. + bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); /// Check for regunit interference only. /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's @@ -136,6 +137,10 @@ public: /// This returns a reference to an internal Query data structure that is only /// valid until the next query() call. LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit); + + /// Directly access the live interval unions per regunit. + /// This returns an array indexed by the regunit number. + LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; } }; } // end namespace llvm diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index acf986ca61..348ed3a0f9 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -573,7 +573,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); - UseRegs.push_back(MOReg); + if (MO.readsReg()) + UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); @@ -729,8 +730,9 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] - .push_back(BBI->getOperand(i).getReg()); + if (BBI->getOperand(i).readsReg()) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 20b302926b..fa971e7c3a 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -272,11 +272,9 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } - if (Alignment) { + if (Alignment) OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) << " bytes)"; - Comma = ", "; - } OS << '\n'; diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 9ca0ad2e24..5a15f92a18 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -63,17 +63,13 @@ namespace { /// /// This is the datastructure representing a chain of consecutive blocks that /// are profitable to layout together in order to maximize fallthrough -/// probabilities. We also can use a block chain to represent a sequence of -/// basic blocks which have some external (correctness) requirement for -/// sequential layout. +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. /// -/// Eventually, the block chains will form a directed graph over the function. -/// We provide an SCC-supporting-iterator in order to quicky build and walk the -/// SCCs of block chains within a function. -/// -/// The block chains also have support for calculating and caching probability -/// information related to the chain itself versus other chains. This is used -/// for ranking during the final layout of block chains. +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. class BlockChain { /// \brief The sequence of blocks belonging to this chain. /// @@ -179,10 +175,11 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief Allocator and owner of BlockChain structures. /// - /// We build BlockChains lazily by merging together high probability BB - /// sequences according to the "Algo2" in the paper mentioned at the top of - /// the file. To reduce malloc traffic, we allocate them using this slab-like - /// allocator, and destroy them after the pass completes. + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. SpecificBumpPtrAllocator<BlockChain> ChainAllocator; /// \brief Function wide BasicBlock to BlockChain mapping. diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d8c2f6a2ea..d4aede8a7e 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -60,7 +60,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( + FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); @@ -84,9 +84,13 @@ MachineFunction::~MachineFunction() { MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo); } - FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); - ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); - + + FrameInfo->~MachineFrameInfo(); + Allocator.Deallocate(FrameInfo); + + ConstantPool->~MachineConstantPool(); + Allocator.Deallocate(ConstantPool); + if (JumpTableInfo) { JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); @@ -98,7 +102,7 @@ MachineFunction::~MachineFunction() { MachineJumpTableInfo *MachineFunction:: getOrCreateJumpTableInfo(unsigned EntryKind) { if (JumpTableInfo) return JumpTableInfo; - + JumpTableInfo = new (Allocator) MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); return JumpTableInfo; @@ -116,12 +120,12 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBI = begin(); else MBBI = MBB; - + // Figure out the block number this should have. unsigned BlockNo = 0; if (MBBI != begin()) BlockNo = prior(MBBI)->getNumber()+1; - + for (; MBBI != E; ++MBBI, ++BlockNo) { if (MBBI->getNumber() != (int)BlockNo) { // Remove use of the old number. @@ -130,7 +134,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { "MBB number mismatch!"); MBBNumbering[MBBI->getNumber()] = 0; } - + // If BlockNo is already taken, set that block's number to -1. if (MBBNumbering[BlockNo]) MBBNumbering[BlockNo]->setNumber(-1); @@ -138,7 +142,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering[BlockNo] = MBBI; MBBI->setNumber(BlockNo); } - } + } // Okay, all the blocks are renumbered. If we have compactified the block // numbering, shrink MBBNumbering now. @@ -295,16 +299,16 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { // Print Frame Information FrameInfo->print(*this, OS); - + // Print JumpTable Information if (JumpTableInfo) JumpTableInfo->print(OS); // Print Constant Pool ConstantPool->print(OS); - + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); - + if (RegInfo && !RegInfo->livein_empty()) { OS << "Function Live Ins: "; for (MachineRegisterInfo::livein_iterator @@ -324,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << ' ' << PrintReg(*I, TRI); OS << '\n'; } - + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; BB->print(OS, Indexes); @@ -411,10 +415,9 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { assert(JumpTableInfo && "No jump tables"); - assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : MAI.getPrivateGlobalPrefix(); SmallString<60> Name; @@ -691,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, const_cast<Constant*>(B), TD); - + return A == B; } @@ -714,7 +717,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, Constants[i].Alignment = Alignment; return i; } - + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); return Constants.size()-1; } @@ -723,7 +726,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; - + // Check to see if we already have this constant. // // FIXME, this could be made much more efficient for large constant pools. diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a35978b5cd..5d16c20460 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" @@ -33,7 +34,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 189cb2ba5d..9f3829e3c0 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -9,7 +9,7 @@ // // This file defines the MachineLoopInfo class that is used to identify natural // loops and determine the loop depth of various nodes of the CFG. Note that -// the loops identified may actually be several natural loops that share the +// the loops identified may actually be several natural loops that share the // same header node... not just a single natural loop. // //===----------------------------------------------------------------------===// @@ -17,17 +17,13 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Support/Debug.h" using namespace llvm; -namespace llvm { -#define MLB class LoopBase<MachineBasicBlock, MachineLoop> -TEMPLATE_INSTANTIATION(MLB); -#undef MLB -#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop> -TEMPLATE_INSTANTIATION(MLIB); -#undef MLIB -} +// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. +template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; +template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; char MachineLoopInfo::ID = 0; INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", @@ -40,7 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); - LI.Calculate(getAnalysis<MachineDominatorTree>().getBase()); // Update + LI.Analyze(getAnalysis<MachineDominatorTree>().getBase()); return false; } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 9c0d749a26..82e123528a 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -162,9 +162,22 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. def_iterator I = def_begin(Reg); + assert((I.atEnd() || llvm::next(I) == def_end()) && + "getVRegDef assumes a single definition or no definition"); return !I.atEnd() ? &*I : 0; } +/// getUniqueVRegDef - Return the unique machine instr that defines the +/// specified virtual register or null if none is found. If there are +/// multiple definitions or no definition, return null. +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { + if (def_empty(Reg)) return 0; + def_iterator I = def_begin(Reg); + if (llvm::next(I) != def_end()) + return 0; + return &*I; +} + bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { use_iterator UI = use_begin(RegNo); if (UI == use_end()) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 070a55704d..acb1ee6cb6 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -241,30 +241,6 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { I->second = NewReg; } -/// MachinePHIiter - Iterator for PHI operands. This is used for the -/// PHI_iterator in the SSAUpdaterImpl template. -namespace { - class MachinePHIiter { - private: - MachineInstr *PHI; - unsigned idx; - - public: - explicit MachinePHIiter(MachineInstr *P) // begin iterator - : PHI(P), idx(1) {} - MachinePHIiter(MachineInstr *P, bool) // end iterator - : PHI(P), idx(PHI->getNumOperands()) {} - - MachinePHIiter &operator++() { idx += 2; return *this; } - bool operator==(const MachinePHIiter& x) const { return idx == x.idx; } - bool operator!=(const MachinePHIiter& x) const { return !operator==(x); } - unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } - MachineBasicBlock *getIncomingBlock() { - return PHI->getOperand(idx+1).getMBB(); - } - }; -} - /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { @@ -279,7 +255,26 @@ public: static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } - typedef MachinePHIiter PHI_iterator; + /// Iterator for PHI operands. + class PHI_iterator { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit PHI_iterator(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + PHI_iterator(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + PHI_iterator &operator++() { idx += 2; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } static inline PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 1783cbe21e..847bf1e76e 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -351,15 +351,21 @@ class ScheduleDAGMI : public ScheduleDAGInstrs { IntervalPressure BotPressure; RegPressureTracker BotRPTracker; +#ifndef NDEBUG /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. unsigned NumInstrsScheduled; +#endif public: ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure), NumInstrsScheduled(0) {} + CurrentBottom(), BotRPTracker(BotPressure) { +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + } ~ScheduleDAGMI() { delete SchedImpl; @@ -396,11 +402,17 @@ public: } /// getIssueWidth - Return the max instructions per scheduling group. - /// unsigned getIssueWidth() const { return InstrItins ? InstrItins->Props.IssueWidth : 1; } + /// getNumMicroOps - Return the number of issue slots required for this MI. + unsigned getNumMicroOps(MachineInstr *MI) const { + if (!InstrItins) return 1; + int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); + } + protected: void initRegPressure(); void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); @@ -782,6 +794,8 @@ class ConvergingScheduler : public MachineSchedStrategy { /// current cycle in whichever direction at has moved, and maintains the state /// of "hazards" and other interlocks at the current cycle. struct SchedBoundary { + ScheduleDAGMI *DAG; + ReadyQueue Available; ReadyQueue Pending; bool CheckPending; @@ -800,7 +814,7 @@ class ConvergingScheduler : public MachineSchedStrategy { /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): - Available(ID, Name+".A"), + DAG(0), Available(ID, Name+".A"), Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} @@ -811,11 +825,13 @@ class ConvergingScheduler : public MachineSchedStrategy { return Available.getID() == ConvergingScheduler::TopQID; } + bool checkHazard(SUnit *SU); + void releaseNode(SUnit *SU, unsigned ReadyCycle); void bumpCycle(); - void bumpNode(SUnit *SU, unsigned IssueWidth); + void bumpNode(SUnit *SU); void releasePending(); @@ -868,6 +884,8 @@ protected: void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; TRI = DAG->TRI; + Top.DAG = dag; + Bot.DAG = dag; // Initialize the HazardRecognizers. const TargetMachine &TM = DAG->MF.getTarget(); @@ -917,6 +935,29 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { Bot.releaseNode(SU, SU->BotReadyCycle); } +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + return true; + + return false; +} + void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) @@ -924,9 +965,7 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle - || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU) - != ScheduleHazardRecognizer::NoHazard))) + if (ReadyCycle > CurrCycle || checkHazard(SU)) Pending.push(SU); else Available.push(SU); @@ -934,7 +973,8 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, /// Move the boundary of scheduled code by one cycle. void ConvergingScheduler::SchedBoundary::bumpCycle() { - IssueCount = 0; + unsigned Width = DAG->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); @@ -959,8 +999,7 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } /// Move the boundary of scheduled code by one SUnit. -void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU, - unsigned IssueWidth) { +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -970,9 +1009,10 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU, } HazardRec->EmitInstruction(SU); } - // Check the instruction group size limit. - ++IssueCount; - if (IssueCount == IssueWidth) { + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += DAG->getNumMicroOps(SU->getInstr()); + if (IssueCount >= DAG->getIssueWidth()) { DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); bumpCycle(); } @@ -997,8 +1037,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { if (ReadyCycle > CurrCycle) continue; - if (HazardRec->isEnabled() - && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) + if (checkHazard(SU)) continue; Available.push(SU); @@ -1271,11 +1310,11 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = Top.CurrCycle; - Top.bumpNode(SU, DAG->getIssueWidth()); + Top.bumpNode(SU); } else { SU->BotReadyCycle = Bot.CurrCycle; - Bot.bumpNode(SU, DAG->getIssueWidth()); + Bot.bumpNode(SU); } } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 7c64fc65e6..45ce3ab28b 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -476,8 +476,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().isBarrier() && - !TII->isPredicated(&MBB->back())) { + if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && + !TII->isPredicated(getBundleStart(&MBB->back()))) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -497,10 +497,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -520,10 +520,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().isBarrier()) { + } else if (getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -540,10 +540,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -866,12 +866,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + if (VNI->def != DefIdx) { report("Inconsistent valno->def", MO, MONum); *OS << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; @@ -1048,7 +1049,21 @@ void MachineVerifier::visitMachineFunctionAfter() { // Now check liveness info if available calcRegsRequired(); - if (MRI->isSSA() && !MF->empty()) { + // Check for killed virtual registers that should be live out. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + if (MInfo.regsKilled.count(*I)) { + report("Virtual register killed in block, but needed live out.", MFI); + *OS << "Virtual register " << PrintReg(*I) + << " is used after the block.\n"; + } + } + + if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; @@ -1092,20 +1107,21 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); - for (LiveIntervals::const_iterator LVI = LiveInts->begin(), - LVE = LiveInts->end(); LVI != LVE; ++LVI) { - const LiveInterval &LI = *LVI->second; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. - if (MRI->reg_nodbg_empty(LI.reg)) + if (MRI->reg_nodbg_empty(Reg)) continue; - // Physical registers have much weirdness going on, mostly from coalescing. - // We should probably fix it, but for now just ignore them. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg)) + if (!LiveInts->hasInterval(Reg)) { + report("Missing live interval for virtual register", MF); + *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; continue; + } - assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + const LiveInterval &LI = LiveInts->getInterval(Reg); + assert(Reg == LI.reg && "Invalid reg to interval mapping"); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I!=E; ++I) { @@ -1330,15 +1346,18 @@ void MachineVerifier::verifyLiveIntervals() { ++MFI; continue; } + + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) - continue; - + // All predecessors must have a live-out value. if (!PVNI) { report("Register not marked live out of predecessor", *PI); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1347,12 +1366,14 @@ void MachineVerifier::verifyLiveIntervals() { continue; } - if (PVNI != VNI) { + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", *PI); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n'; + << '@' << LiveInts->getMBBStartIdx(MFI) << " in " + << PrintReg(Reg) << ": " << LI << '\n'; } } if (&*MFI == EndMBB) diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 0ed4c34bb1..b820578d6c 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -171,21 +171,28 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, return true; } +/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// This includes registers with no defs. +static bool isImplicitlyDefined(unsigned VirtReg, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), + DE = MRI->def_end(); DI != DE; ++DI) + if (!DI->isImplicitDef()) + return false; + return true; +} + /// isSourceDefinedByImplicitDef - Return true if all sources of the phi node /// are implicit_def's. static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, const MachineRegisterInfo *MRI) { - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - unsigned SrcReg = MPhi->getOperand(i).getReg(); - const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (!DefMI || !DefMI->isImplicitDef()) + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) return false; - } return true; } - /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the @@ -287,7 +294,8 @@ void PHIElimination::LowerAtomicPHINode( for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); - + bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || + isImplicitlyDefined(SrcReg, MRI); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); @@ -295,14 +303,6 @@ void PHIElimination::LowerAtomicPHINode( // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // If source is defined by an implicit def, there is no need to insert a - // copy. - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) { - ImpDefs.insert(DefMI); - continue; - } - // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. @@ -315,12 +315,27 @@ void PHIElimination::LowerAtomicPHINode( findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. - if (!reusedIncoming && IncomingReg) - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); + if (!reusedIncoming && IncomingReg) { + if (SrcUndef) { + // The source register is undefined, so there is no need for a real + // COPY, but we still need to ensure joint dominance by defs. + // Insert an IMPLICIT_DEF instruction. + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + + // Clean up the old implicit-def, if there even was one. + if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) + if (DefMI->isImplicitDef()) + ImpDefs.insert(DefMI); + } else { + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); + } + } // Now update live variable information if we have it. Otherwise we're done - if (!LV) continue; + if (SrcUndef || !LV) continue; // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 8ee97e6c1b..9693780bda 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -89,10 +90,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. /// These should be converted to boolOrDefault in order to use applyOverride. -static AnalysisID applyDisable(AnalysisID ID, bool Override) { +static AnalysisID applyDisable(AnalysisID PassID, bool Override) { if (Override) - return &NoPassID; - return ID; + return 0; + return PassID; } /// Allow Pass selection to be overriden by command line options. This supports @@ -105,13 +106,13 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, case cl::BOU_UNSET: return TargetID; case cl::BOU_TRUE: - if (TargetID != &NoPassID) + if (TargetID) return TargetID; - if (StandardID == &NoPassID) + if (StandardID == 0) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: - return &NoPassID; + return 0; } llvm_unreachable("Invalid command line option state"); } @@ -182,9 +183,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", "Target Pass Configuration", false, false) char TargetPassConfig::ID = 0; -static char NoPassIDAnchor = 0; -char &llvm::NoPassID = NoPassIDAnchor; - // Pseudo Pass IDs. char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; @@ -197,8 +195,8 @@ public: // that are part of a standard pass pipeline without overridding the entire // pipeline. This mechanism allows target options to inherit a standard pass's // user interface. For example, a target may disable a standard pass by - // default by substituting NoPass, and the user may still enable that standard - // pass with an explicit command line option. + // default by substituting a pass ID of zero, and the user may still enable + // that standard pass with an explicit command line option. DenseMap<AnalysisID,AnalysisID> TargetPasses; /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass @@ -215,7 +213,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false), + : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), + Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -226,18 +225,18 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) initializeCodeGen(*PassRegistry::getPassRegistry()); // Substitute Pseudo Pass IDs for real ones. - substitutePass(EarlyTailDuplicateID, TailDuplicateID); - substitutePass(PostRAMachineLICMID, MachineLICMID); + substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); + substitutePass(&PostRAMachineLICMID, &MachineLICMID); // Temporarily disable experimental passes. - substitutePass(MachineSchedulerID, NoPassID); + substitutePass(&MachineSchedulerID, 0); } /// Insert InsertedPassID pass after TargetPassID. -void TargetPassConfig::insertPass(const char &TargetPassID, - const char &InsertedPassID) { - assert(&TargetPassID != &InsertedPassID && "Insert a pass after itself!"); - std::pair<AnalysisID, AnalysisID> P(&TargetPassID, &InsertedPassID); +void TargetPassConfig::insertPass(AnalysisID TargetPassID, + AnalysisID InsertedPassID) { + assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); + std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID); Impl->InsertedPasses.push_back(P); } @@ -260,8 +259,9 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) { Opt = Val; } -void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) { - Impl->TargetPasses[&StandardID] = &TargetID; +void TargetPassConfig::substitutePass(AnalysisID StandardID, + AnalysisID TargetID) { + Impl->TargetPasses[StandardID] = TargetID; } AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { @@ -272,40 +272,62 @@ AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { return I->second; } -/// Add a CodeGen pass at this point in the pipeline after checking for target -/// and command line overrides. -AnalysisID TargetPassConfig::addPass(char &ID) { +/// Add a pass to the PassManager if that pass is supposed to be run. If the +/// Started/Stopped flags indicate either that the compilation should start at +/// a later pass or that it should stop after an earlier pass, then do not add +/// the pass. Finally, compare the current pass against the StartAfter +/// and StopAfter options and change the Started/Stopped flags accordingly. +void TargetPassConfig::addPass(Pass *P) { assert(!Initialized && "PassConfig is immutable"); - AnalysisID TargetID = getPassSubstitution(&ID); - AnalysisID FinalID = overridePass(&ID, TargetID); - if (FinalID == &NoPassID) + // Cache the Pass ID here in case the pass manager finds this pass is + // redundant with ones already scheduled / available, and deletes it. + // Fundamentally, once we add the pass to the manager, we no longer own it + // and shouldn't reference it. + AnalysisID PassID = P->getPassID(); + + if (Started && !Stopped) + PM->add(P); + if (StopAfter == PassID) + Stopped = true; + if (StartAfter == PassID) + Started = true; + if (Stopped && !Started) + report_fatal_error("Cannot stop compilation after pass that is not run"); +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { + AnalysisID TargetID = getPassSubstitution(PassID); + AnalysisID FinalID = overridePass(PassID, TargetID); + if (FinalID == 0) return FinalID; Pass *P = Pass::createPass(FinalID); if (!P) llvm_unreachable("Pass ID not registered"); - PM->add(P); + addPass(P); // Add the passes after the pass P if there is any. for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { - if ((*I).first == &ID) { + if ((*I).first == PassID) { assert((*I).second && "Illegal Pass ID!"); Pass *NP = Pass::createPass((*I).second); assert(NP && "Pass ID not registered"); - PM->add(NP); + addPass(NP); } } return FinalID; } -void TargetPassConfig::printAndVerify(const char *Banner) const { +void TargetPassConfig::printAndVerify(const char *Banner) { if (TM->shouldPrintMachineCode()) - PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); + addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM->add(createMachineVerifierPass(Banner)); + addPass(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -315,46 +337,73 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - PM->add(createTypeBasedAliasAnalysisPass()); - PM->add(createBasicAliasAnalysisPass()); + addPass(createTypeBasedAliasAnalysisPass()); + addPass(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - PM->add(createLoopStrengthReducePass(getTargetLowering())); + addPass(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } - PM->add(createGCLoweringPass()); + addPass(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. - PM->add(createUnreachableBlockEliminationPass()); + addPass(createUnreachableBlockEliminationPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +void TargetPassConfig::addPassesToHandleExceptions() { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + addPass(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(createUnreachableBlockEliminationPass()); + break; + } } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - PM->add(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(getTargetLowering())); - PM->add(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(getTargetLowering())); addPreISel(); if (PrintISelInput) - PM->add(createPrintFunctionPass("\n\n" + addPass(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", &dbgs())); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); } /// Add the complete set of target-independent postISel code generator passes. @@ -391,11 +440,11 @@ void TargetPassConfig::addMachinePasses() { assert (TPI && IPI && "Pass ID not registered!"); const char *TID = (char *)(TPI->getTypeInfo()); const char *IID = (char *)(IPI->getTypeInfo()); - insertPass(*TID, *IID); + insertPass(TID, IID); } // Expand pseudo-instructions emitted by ISel. - addPass(ExpandISelPseudosID); + addPass(&ExpandISelPseudosID); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -404,7 +453,7 @@ void TargetPassConfig::addMachinePasses() { else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); } // Run pre-ra passes. @@ -423,7 +472,7 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After PostRegAlloc passes"); // Insert prolog/epilog code. Eliminate abstract frame index references... - addPass(PrologEpilogCodeInserterID); + addPass(&PrologEpilogCodeInserterID); printAndVerify("After PrologEpilogCodeInserter"); /// Add passes that optimize machine instructions after register allocation. @@ -431,7 +480,7 @@ void TargetPassConfig::addMachinePasses() { addMachineLateOptimization(); // Expand pseudo instructions before second scheduling pass. - addPass(ExpandPostRAPseudosID); + addPass(&ExpandPostRAPseudosID); printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. @@ -440,14 +489,14 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(PostRASchedulerID); + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } // GC - addPass(GCMachineCodeAnalysisID); + addPass(&GCMachineCodeAnalysisID); if (PrintGCInfo) - PM->add(createGCInfoPrinter(dbgs())); + addPass(createGCInfoPrinter(dbgs())); // Basic block placement. if (getOptLevel() != CodeGenOpt::None) @@ -460,30 +509,30 @@ void TargetPassConfig::addMachinePasses() { /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(EarlyTailDuplicateID) != &NoPassID) + if (addPass(&EarlyTailDuplicateID)) printAndVerify("After Pre-RegAlloc TailDuplicate"); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(OptimizePHIsID); + addPass(&OptimizePHIsID); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - addPass(DeadMachineInstructionElimID); + addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(MachineLICMID); - addPass(MachineCSEID); - addPass(MachineSinkingID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&MachineSinkingID); printAndVerify("After Machine LICM, CSE and Sinking passes"); - addPass(PeepholeOptimizerID); + addPass(&PeepholeOptimizerID); printAndVerify("After codegen peephole optimization pass"); } @@ -561,10 +610,10 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - addPass(PHIEliminationID); - addPass(TwoAddressInstructionPassID); + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); - PM->add(RegAllocPass); + addPass(RegAllocPass); printAndVerify("After Register Allocation"); } @@ -572,45 +621,45 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&ProcessImplicitDefsID); + // LiveVariables currently requires pure SSA form. // // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). - addPass(LiveVariablesID); + addPass(&LiveVariablesID); // Add passes that move from transformed SSA into conventional SSA. This is a // "copy coalescing" problem. // if (!EnableStrongPHIElim) { // Edge splitting is smarter with machine loop info. - addPass(MachineLoopInfoID); - addPass(PHIEliminationID); + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); } - addPass(TwoAddressInstructionPassID); - - // FIXME: Either remove this pass completely, or fix it so that it works on - // SSA form. We could modify LiveIntervals to be independent of this pass, But - // it would be even better to simply eliminate *all* IMPLICIT_DEFs before - // leaving SSA. - addPass(ProcessImplicitDefsID); + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) - addPass(StrongPHIEliminationID); + addPass(&StrongPHIEliminationID); - addPass(RegisterCoalescerID); + addPass(&RegisterCoalescerID); // PreRA instruction scheduling. - if (addPass(MachineSchedulerID) != &NoPassID) + if (addPass(&MachineSchedulerID)) printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. - PM->add(RegAllocPass); - printAndVerify("After Register Allocation"); + addPass(RegAllocPass); + printAndVerify("After Register Allocation, before rewriter"); + + // Allow targets to change the register assignments before rewriting. + if (addPreRewrite()) + printAndVerify("After pre-rewrite passes"); // Finally rewrite virtual registers. - addPass(VirtRegRewriterID); + addPass(&VirtRegRewriterID); printAndVerify("After Virtual Register Rewriter"); // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, @@ -625,12 +674,12 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. - addPass(StackSlotColoringID); + addPass(&StackSlotColoringID); // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? - addPass(PostRAMachineLICMID); + addPass(&PostRAMachineLICMID); printAndVerify("After StackSlotColoring and postra Machine LICM"); } @@ -642,33 +691,33 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. - if (addPass(BranchFolderPassID) != &NoPassID) + if (addPass(&BranchFolderPassID)) printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(TailDuplicateID) != &NoPassID) + if (addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. - if (addPass(MachineCopyPropagationID) != &NoPassID) + if (addPass(&MachineCopyPropagationID)) printAndVerify("After copy propagation pass"); } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID ID = &NoPassID; + AnalysisID PassID = 0; if (!DisableBlockPlacement) { // MachineBlockPlacement is a new pass which subsumes the functionality of // CodPlacementOpt. The old code placement pass can be restored by // disabling block placement, but eventually it will be removed. - ID = addPass(MachineBlockPlacementID); + PassID = addPass(&MachineBlockPlacementID); } else { - ID = addPass(CodePlacementOptID); + PassID = addPass(&CodePlacementOptID); } - if (ID != &NoPassID) { + if (PassID) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) - addPass(MachineBlockPlacementStatsID); + addPass(&MachineBlockPlacementStatsID); printAndVerify("After machine block placement."); } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 81cf9011d1..91c33c4af4 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -145,8 +145,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); - if (++UI == MRI->use_nodbg_end()) + if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; @@ -157,11 +156,19 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (!DstRC) return false; + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = TM->getRegisterInfo()-> + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); @@ -172,8 +179,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallVector<MachineOperand*, 8> ExtendedUses; bool ExtendLife = true; - UI = MRI->use_nodbg_begin(SrcReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; @@ -185,6 +192,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + // It's an error to translate this: // // %reg1025 = <sext> %reg1024 @@ -239,9 +250,9 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator - UE = MRI->use_nodbg_end(); UI != UE; ++UI) + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); @@ -260,10 +271,14 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, } unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } UseMO->setReg(NewVR); ++NumReuse; Changed = true; @@ -353,14 +368,15 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. - unsigned SrcReg; + unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; - if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. - if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++NumCmps; return true; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 7735fa2bb1..34d075c232 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -9,297 +9,163 @@ #define DEBUG_TYPE "processimplicitdefs" -#include "llvm/CodeGen/ProcessImplicitDefs.h" - -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" - using namespace llvm; +namespace { +/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def +/// for each use. Add isUndef marker to implicit_def defs and their uses. +class ProcessImplicitDefs : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + SmallSetVector<MachineInstr*, 16> WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + static char ID; + + ProcessImplicitDefs() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); +}; +} // end anonymous namespace + char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(TwoAddressInstructionPassID); - AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } -bool -ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - SmallSet<unsigned, 8> &ImpDefRegs) { - switch(OpIdx) { - case 1: - return MI->isCopy() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - case 2: - return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - default: return false; - } -} - -static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - SmallSet<unsigned, 8> &ImpDefRegs) { - if (MI->isCopy()) { - MachineOperand &MO0 = MI->getOperand(0); - MachineOperand &MO1 = MI->getOperand(1); - if (MO1.getReg() != Reg) - return false; - if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) - return true; +bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { + if (!MI->isCopyLike() && + !MI->isInsertSubreg() && + !MI->isRegSequence() && + !MI->isPHI()) return false; - } - return false; + for (MIOperands MO(MI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse() && MO->readsReg()) + return false; + return true; } -/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure -/// there is one implicit_def for each use. Add isUndef marker to -/// implicit_def defs and their uses. -bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { - - DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)fn.getFunction())->getName() << '\n'); - - bool Changed = false; - - TII = fn.getTarget().getInstrInfo(); - TRI = fn.getTarget().getRegisterInfo(); - MRI = &fn.getRegInfo(); - LV = getAnalysisIfAvailable<LiveVariables>(); - - SmallSet<unsigned, 8> ImpDefRegs; - SmallVector<MachineInstr*, 8> ImpDefMIs; - SmallVector<MachineInstr*, 4> RUses; - SmallPtrSet<MachineBasicBlock*,16> Visited; - SmallPtrSet<MachineInstr*, 8> ModInsts; - - MachineBasicBlock *Entry = fn.begin(); - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MachineBasicBlock *MBB = *DFI; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ) { - MachineInstr *MI = &*I; - ++I; - if (MI->isImplicitDef()) { - ImpDefMIs.push_back(MI); - // Is this a sub-register read-modify-write? - if (MI->getOperand(0).readsReg()) - continue; - unsigned Reg = MI->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - ImpDefRegs.insert(*SubRegs); - } +void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { + DEBUG(dbgs() << "Processing " << *MI); + unsigned Reg = MI->getOperand(0).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // For virtual regiusters, mark all uses as <undef>, and convert users to + // implicit-def when possible. + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &MO = UI.getOperand(); + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + if (!canTurnIntoImplicitDef(UserMI)) continue; - } - - // Eliminate %reg1032:sub<def> = COPY undef. - if (MI->isCopy() && MI->getOperand(0).readsReg()) { - MachineOperand &MO = MI->getOperand(1); - if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { - if (LV && MO.isKill()) { - LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); - vi.removeKill(MI); - } - unsigned Reg = MI->getOperand(0).getReg(); - MI->eraseFromParent(); - Changed = true; - - // A REG_SEQUENCE may have been expanded into partial definitions. - // If this was the last one, mark Reg as implicitly defined. - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) - ImpDefRegs.insert(Reg); - continue; - } - } - - bool ChangedToImpDef = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.readsReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!ImpDefRegs.count(Reg)) - continue; - // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { - bool isKill = MO.isKill(); - MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); - if (isKill) { - ImpDefRegs.erase(Reg); - if (LV) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(MI); - } - } - ChangedToImpDef = true; - Changed = true; - break; - } - - Changed = true; - MO.setIsUndef(); - // This is a partial register redef of an implicit def. - // Make sure the whole register is defined by the instruction. - if (MO.isDef()) { - MI->addRegisterDefined(Reg); - continue; - } - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other reads of Reg are also marked <undef>. - for (unsigned j = i+1; j != e; ++j) { - MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) - MOJ.setIsUndef(); - } - ImpDefRegs.erase(Reg); - } - } - - if (ChangedToImpDef) { - // Backtrack to process this new implicit_def. - --I; - } else { - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - ImpDefRegs.erase(MO.getReg()); - } - } + DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + WorkList.insert(UserMI); } + MI->eraseFromParent(); + return; + } - // Any outstanding liveout implicit_def's? - for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { - MachineInstr *MI = ImpDefMIs[i]; - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !ImpDefRegs.count(Reg)) { - // Delete all "local" implicit_def's. That include those which define - // physical registers since they cannot be liveout. - MI->eraseFromParent(); - Changed = true; + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); + bool Found = false; + for (++UserMI; UserMI != UserE; ++UserMI) { + for (MIOperands MO(UserMI); MO.isValid(); ++MO) { + if (!MO->isReg()) continue; - } - - // If there are multiple defs of the same register and at least one - // is not an implicit_def, do not insert implicit_def's before the - // uses. - bool Skip = false; - SmallVector<MachineInstr*, 4> DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), - DE = MRI->def_end(); DI != DE; ++DI) { - MachineInstr *DeadImpDef = &*DI; - if (!DeadImpDef->isImplicitDef()) { - Skip = true; - break; - } - DeadImpDefs.push_back(DeadImpDef); - } - if (Skip) + unsigned UserReg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + !TRI->regsOverlap(Reg, UserReg)) continue; + // UserMI uses or redefines Reg. Set <undef> flags on all uses. + Found = true; + if (MO->isUse()) + MO->setIsUndef(); + } + if (Found) + break; + } - // The only implicit_def which we want to keep are those that are live - // out of its block. - for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) - DeadImpDefs[j]->eraseFromParent(); - Changed = true; - - // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - if (UI.getOperand().isUndef()) - continue; - MachineInstr *RMI = &*UI; - if (ModInsts.insert(RMI)) - RUses.push_back(RMI); - } + // If we found the using MI, we can erase the IMPLICIT_DEF. + if (Found) { + DEBUG(dbgs() << "Physreg user: " << *UserMI); + MI->eraseFromParent(); + return; + } - for (unsigned i = 0, e = RUses.size(); i != e; ++i) { - MachineInstr *RMI = RUses[i]; + // Using instr wasn't found, it could be in another block. + // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + for (unsigned i = MI->getNumOperands() - 1; i; --i) + MI->RemoveOperand(i); + DEBUG(dbgs() << "Keeping physreg: " << *MI); +} - // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, ImpDefRegs)) { - RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); +/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into +/// <undef> operands. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { - bool isKill = false; - SmallVector<unsigned, 4> Ops; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - Ops.push_back(j); - if (RRMO.isKill()) - isKill = true; - } - } - // Leave the other operands along. - for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { - unsigned OpIdx = Ops[j]; - RMI->RemoveOperand(OpIdx-j); - } + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); - // Update LiveVariables varinfo if the instruction is a kill. - if (LV && isKill) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(RMI); - } - continue; - } + bool Changed = false; - // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = MRI->getRegClass(Reg); - unsigned NewVReg = MRI->createVirtualRegister(RC); - bool isKill = true; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - RRMO.setReg(NewVReg); - RRMO.setIsUndef(); - if (isKill) { - // Only the first operand of NewVReg is marked kill. - RRMO.setIsKill(); - isKill = false; - } - } - } - } - RUses.clear(); - ModInsts.clear(); - } - ImpDefRegs.clear(); - ImpDefMIs.clear(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); + assert(WorkList.empty() && "Inconsistent worklist state"); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; ++MFI) { + // Scan the basic block for implicit defs. + for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) + if (MBBI->isImplicitDef()) + WorkList.insert(MBBI); + + if (WorkList.empty()) + continue; + + DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + << " implicit defs.\n"); + Changed = true; + + // Drain the WorkList to recursively process any new implicit defs. + do processImplicitDef(WorkList.pop_back_val()); + while (!WorkList.empty()); } - return Changed; } - diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index c542504722..993dbc71de 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" +#include "LiveRegMatrix.h" #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" @@ -34,8 +35,6 @@ using namespace llvm; -STATISTIC(NumAssigned , "Number of registers assigned"); -STATISTIC(NumUnassigned , "Number of registers unassigned"); STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside @@ -47,69 +46,20 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), const char *RegAllocBase::TimerGroupName = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; -#ifndef NDEBUG -// Verify each LiveIntervalUnion. -void RegAllocBase::verify() { - LiveVirtRegBitSet VisitedVRegs; - OwningArrayPtr<LiveVirtRegBitSet> - unionVRegs(new LiveVirtRegBitSet[TRI->getNumRegs()]); - - // Verify disjoint unions. - for (unsigned PhysReg = 0, NumRegs = TRI->getNumRegs(); PhysReg != NumRegs; - ++PhysReg) { - DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); - LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; - PhysReg2LiveUnion[PhysReg].verify(VRegs); - // Union + intersection test could be done efficiently in one pass, but - // don't add a method to SparseBitVector unless we really need it. - assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); - VisitedVRegs |= VRegs; - } - - // Verify vreg coverage. - for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); - liItr != liEnd; ++liItr) { - unsigned reg = liItr->first; - LiveInterval* li = liItr->second; - if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; - if (!VRM->hasPhys(reg)) continue; // spilled? - if (li->empty()) continue; // unionVRegs will only be filled if li is - // non-empty - unsigned PhysReg = VRM->getPhys(reg); - if (!unionVRegs[PhysReg].test(reg)) { - dbgs() << "LiveVirtReg " << PrintReg(reg, TRI) << " not in union " << - TRI->getName(PhysReg) << "\n"; - llvm_unreachable("unallocated live vreg"); - } - } - // FIXME: I'm not sure how to verify spilled intervals. -} -#endif //!NDEBUG - //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// -void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { - NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); +void RegAllocBase::init(VirtRegMap &vrm, + LiveIntervals &lis, + LiveRegMatrix &mat) { TRI = &vrm.getTargetRegInfo(); MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; + Matrix = &mat; MRI->freezeReservedRegs(vrm.getMachineFunction()); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); - - const unsigned NumRegs = TRI->getNumRegs(); - if (NumRegs != PhysReg2LiveUnion.size()) { - PhysReg2LiveUnion.init(UnionAllocator, NumRegs); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[NumRegs]); - } -} - -void RegAllocBase::releaseMemory() { - for (unsigned r = 0, e = PhysReg2LiveUnion.size(); r != e; ++r) - PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -117,35 +67,14 @@ void RegAllocBase::releaseMemory() { // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { - unsigned RegNum = I->first; - LiveInterval &VirtReg = *I->second; - if (TargetRegisterInfo::isPhysicalRegister(RegNum)) - PhysReg2LiveUnion[RegNum].unify(VirtReg); - else - enqueue(&VirtReg); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + enqueue(&LIS->getInterval(Reg)); } } -void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << '\n'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); - PhysReg2LiveUnion[PhysReg].unify(VirtReg); - ++NumAssigned; -} - -void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << '\n'); - assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); - PhysReg2LiveUnion[PhysReg].extract(VirtReg); - VRM->clearVirt(VirtReg.reg); - ++NumUnassigned; -} - // Top-level driver to manage the queue of unassigned VirtRegs and call the // selectOrSplit implementation. void RegAllocBase::allocatePhysRegs() { @@ -163,7 +92,7 @@ void RegAllocBase::allocatePhysRegs() { } // Invalidate all interference queries, live ranges could have changed. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that @@ -195,7 +124,7 @@ void RegAllocBase::allocatePhysRegs() { } if (AvailablePhysReg) - assign(*VirtReg, AvailablePhysReg); + Matrix->assign(*VirtReg, AvailablePhysReg); for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { @@ -214,14 +143,3 @@ void RegAllocBase::allocatePhysRegs() { } } } - -// Check if this live virtual register interferes with a physical register. If -// not, then check for interference on each register that aliases with the -// physical register. Return the interfering register. -unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - if (query(VirtReg, *AI).checkInterference()) - return *AI; - return 0; -} diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index cc13e75947..db0c8e13d3 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -47,6 +47,7 @@ template<typename T> class SmallVectorImpl; class TargetRegisterInfo; class VirtRegMap; class LiveIntervals; +class LiveRegMatrix; class Spiller; /// RegAllocBase provides the register allocation driver and interface that can @@ -56,49 +57,20 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { - LiveIntervalUnion::Allocator UnionAllocator; - - // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual - // registers may have changed. - unsigned UserTag; - - LiveIntervalUnion::Array PhysReg2LiveUnion; - - // Current queries, one per physreg. They must be reinitialized each time we - // query on a new live virtual register. - OwningArrayPtr<LiveIntervalUnion::Query> Queries; - protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; VirtRegMap *VRM; LiveIntervals *LIS; + LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} virtual ~RegAllocBase() {} // A RegAlloc pass should call this before allocatePhysRegs. - void init(VirtRegMap &vrm, LiveIntervals &lis); - - // Get an initialized query to check interferences between lvr and preg. Note - // that Query::init must be called at least once for each physical register - // before querying a new live virtual register. This ties Queries and - // PhysReg2LiveUnion together. - LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); - return Queries[PhysReg]; - } - - // Get direct access to the underlying LiveIntervalUnion for PhysReg. - LiveIntervalUnion &getLiveUnion(unsigned PhysReg) { - return PhysReg2LiveUnion[PhysReg]; - } - - // Invalidate all cached information about virtual registers - live ranges may - // have changed. - void invalidateVirtRegs() { ++UserTag; } + void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. @@ -120,28 +92,6 @@ protected: virtual unsigned selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; - // A RegAlloc pass should call this when PassManager releases its memory. - virtual void releaseMemory(); - - // Helper for checking interference between a live virtual register and a - // physical register, including all its register aliases. If an interference - // exists, return the interfering register, which may be preg or an alias. - unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg); - - /// assign - Assign VirtReg to PhysReg. - /// This should not be called from selectOrSplit for the current register. - void assign(LiveInterval &VirtReg, unsigned PhysReg); - - /// unassign - Undo a previous assignment of VirtReg to PhysReg. - /// This can be invoked from selectOrSplit, but be careful to guarantee that - /// allocation is making progress. - void unassign(LiveInterval &VirtReg, unsigned PhysReg); - -#ifndef NDEBUG - // Verify each LiveIntervalUnion. - void verify(); -#endif - // Use this group name for NamedRegionTimer. static const char *TimerGroupName; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6d0648043a..3a03807ebd 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,11 +13,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "AllocationOrder.h" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" @@ -64,9 +65,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // context MachineFunction *MF; - // analyses - RenderMachineFunction *RMF; - // state std::auto_ptr<Spiller> SpillerInstance; std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, @@ -117,9 +115,6 @@ public: bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl<LiveInterval*> &SplitVRegs); - void spillReg(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs); - static char ID; }; @@ -138,7 +133,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -159,41 +154,15 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); - DEBUG(AU.addRequired<RenderMachineFunction>()); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); MachineFunctionPass::getAnalysisUsage(AU); } void RABasic::releaseMemory() { SpillerInstance.reset(0); - RegAllocBase::releaseMemory(); } -// Helper for spillInterferences() that spills all interfering vregs currently -// assigned to this physical register. -void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, - SmallVectorImpl<LiveInterval*> &SplitVRegs) { - LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); - assert(Q.seenAllInterferences() && "need collectInterferences()"); - const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs(); - - for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(), - E = PendingSpills.end(); I != E; ++I) { - LiveInterval &SpilledVReg = **I; - DEBUG(dbgs() << "extracting from " << - TRI->getName(PhysReg) << " " << SpilledVReg << '\n'); - - // Deallocate the interfering vreg by removing it from the union. - // A LiveInterval instance may not be in a union during modification! - unassign(SpilledVReg, PhysReg); - - // Spill the extracted interval. - LiveRangeEdit LRE(&SpilledVReg, SplitVRegs, *MF, *LIS, VRM); - spiller().spill(LRE); - } - // After extracting segments, the query's results are invalid. But keep the - // contents valid until we're done accessing pendingSpills. - Q.clear(); -} // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are @@ -202,22 +171,41 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. - unsigned NumInterferences = 0; + SmallVector<LiveInterval*, 8> Intfs; + // Collect interferences assigned to any alias of the physical register. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { - LiveIntervalUnion::Query &QAlias = query(VirtReg, *AI); - NumInterferences += QAlias.collectInterferingVRegs(); - if (QAlias.seenUnspillableVReg()) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) return false; + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + return false; + Intfs.push_back(Intf); } } DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << " interferences with " << VirtReg << "\n"); - assert(NumInterferences > 0 && "expect interference"); + assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) - spillReg(VirtReg, *AI, SplitVRegs); + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval &Spill = *Intfs[i]; + + // Skip duplicates. + if (!VRM->hasPhys(Spill.reg)) + continue; + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + Matrix->unassign(Spill); + + // Spill the extracted interval. + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + } return true; } @@ -235,49 +223,36 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &SplitVRegs) { - // Check for register mask interference. When live ranges cross calls, the - // set of usable registers is reduced to the callee-saved ones. - bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs); - // Populate a list of physical register spill candidates. SmallVector<unsigned, 8> PhysRegSpillCands; // Check for an available register in this class. - ArrayRef<unsigned> Order = - RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg)); - for (ArrayRef<unsigned>::iterator I = Order.begin(), E = Order.end(); I != E; - ++I) { - unsigned PhysReg = *I; - - // If PhysReg is clobbered by a register mask, it isn't useful for - // allocation or spilling. - if (CrossRegMasks && !UsableRegs.test(PhysReg)) - continue; - - // Check interference and as a side effect, intialize queries for this - // VirtReg and its aliases. - unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); - if (interfReg == 0) { - // Found an available register. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + while (unsigned PhysReg = Order.next()) { + // Check for interference in PhysReg + switch (Matrix->checkInterference(VirtReg, PhysReg)) { + case LiveRegMatrix::IK_Free: + // PhysReg is available, allocate it. return PhysReg; - } - LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg); - IntfQ.collectInterferingVRegs(1); - LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front(); - // The current VirtReg must either be spillable, or one of its interferences - // must have less spill weight. - if (interferingVirtReg->weight < VirtReg.weight ) { + case LiveRegMatrix::IK_VirtReg: + // Only virtual registers in the way, we may be able to spill them. PhysRegSpillCands.push_back(PhysReg); + continue; + + default: + // RegMask or RegUnit interference. + continue; } } + // Try to spill another interfering reg with less spill weight. for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { - - if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + continue; - assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 && + assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && "Interference after spill."); // Tell the caller to allocate to this newly freed physical register. return *PhysRegI; @@ -301,9 +276,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { << ((Value*)mf.getFunction())->getName() << '\n'); MF = &mf; - DEBUG(RMF = &getAnalysis<RenderMachineFunction>()); - - RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); @@ -311,29 +286,6 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Diagnostic output before rewriting DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); - // optional HTML output - DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM)); - - // FIXME: Verification currently must run before VirtRegRewriter. We should - // make the rewriter a separate pass and override verifyAnalysis instead. When - // that happens, verification naturally falls under VerifyMachineCode. -#ifndef NDEBUG - if (VerifyEnabled) { - // Verify accuracy of LiveIntervals. The standard machine code verifier - // ensures that each LiveIntervals covers all uses of the virtual reg. - - // FIXME: MachineVerifier is badly broken when using the standard - // spiller. Always use -spiller=inline with -verify-regalloc. Even with the - // inline spiller, some tests fail to verify because the coalescer does not - // always generate verifiable code. - MF->verify(this, "In RABasic::verify"); - - // Verify that LiveIntervals are partitioned into unions and disjoint within - // the unions. - verify(); - } -#endif // !NDEBUG - releaseMemory(); return true; } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 46a8247701..6ac5428605 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,6 +16,7 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" +#include "LiveRegMatrix.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -167,19 +168,6 @@ class RAGreedy : public MachineFunctionPass, } }; - // Register mask interference. The current VirtReg is checked for register - // mask interference on entry to selectOrSplit(). If there is no - // interference, UsableRegs is left empty. If there is interference, - // UsableRegs has a bit mask of registers that can be used without register - // mask interference. - BitVector UsableRegs; - - /// clobberedByRegMask - Returns true if PhysReg is not directly usable - /// because of register mask clobbers. - bool clobberedByRegMask(unsigned PhysReg) const { - return !UsableRegs.empty() && !UsableRegs.test(PhysReg); - } - // splitting state. std::auto_ptr<SplitAnalysis> SA; std::auto_ptr<SplitEditor> SE; @@ -328,6 +316,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } @@ -351,6 +340,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); AU.addRequired<EdgeBundles>(); AU.addRequired<SpillPlacement>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -362,8 +353,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { //===----------------------------------------------------------------------===// bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { - if (unsigned PhysReg = VRM->getPhys(VirtReg)) { - unassign(LIS->getInterval(VirtReg), PhysReg); + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); return true; } // Unassigned virtreg is probably in the priority queue. @@ -372,13 +363,12 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (!PhysReg) + if (!VRM->hasPhys(VirtReg)) return; // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); - unassign(LI, PhysReg); + Matrix->unassign(LI); enqueue(&LI); } @@ -400,7 +390,6 @@ void RAGreedy::releaseMemory() { SpillerInstance.reset(0); ExtraRegInfo.clear(); GlobalCand.clear(); - RegAllocBase::releaseMemory(); } void RAGreedy::enqueue(LiveInterval *LI) { @@ -452,12 +441,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) { - if (clobberedByRegMask(PhysReg)) - continue; - if (!checkPhysRegInterference(VirtReg, PhysReg)) + while ((PhysReg = Order.next())) + if (!Matrix->checkInterference(VirtReg, PhysReg)) break; - } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -466,7 +452,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { + if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -529,6 +515,10 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -541,8 +531,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; @@ -550,8 +540,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) - return false; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; @@ -605,19 +595,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); + + // Collect all interfering virtregs first. + SmallVector<LiveInterval*, 8> Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || - VirtReg.isSpillable() < Intf->isSpillable()) && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } + ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); } } @@ -644,8 +644,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { - if (clobberedByRegMask(PhysReg)) - continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1358,9 +1356,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { - if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI) - .checkInterference()) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) + .checkInterference()) continue; // We know that VirtReg is a continuous interval from FirstInstr to @@ -1370,7 +1368,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1390,6 +1389,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; } } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } } /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only @@ -1422,7 +1445,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // If VirtReg is live across any register mask operands, compute a list of // gaps with register masks. SmallVector<unsigned, 8> RegMaskGaps; - if (!UsableRegs.empty()) { + if (Matrix->checkRegMaskInterference(VirtReg)) { // Get regmask slots for the whole block. ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); DEBUG(dbgs() << RMS.size() << " regmasks in block:"); @@ -1484,7 +1507,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, calcGapWeights(PhysReg, GapWeight); // Remove any gaps with regmask clobbers. - if (clobberedByRegMask(PhysReg)) + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) GapWeight[RegMaskGaps[i]] = HUGE_VALF; @@ -1644,7 +1667,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // an assertion when the coalescer is fixed. if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } @@ -1669,11 +1692,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { - // Check if VirtReg is live across any calls. - UsableRegs.clear(); - if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) - DEBUG(dbgs() << "Live across regmasks.\n"); - // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1735,7 +1753,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); - RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); @@ -1749,7 +1769,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 2c58d7dda2..d0db26b208 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,6 @@ #define DEBUG_TYPE "regalloc" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" @@ -98,7 +97,6 @@ public: initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } /// Return the pass name. @@ -134,7 +132,6 @@ private: const TargetInstrInfo *tii; const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; - RenderMachineFunction *rmf; std::auto_ptr<Spiller> spiller; LiveIntervals *lis; @@ -196,7 +193,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector<const LiveInterval*> LIVector; - ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots(); + LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -205,12 +202,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, RegSet pregs; // Collect the set of preg intervals, record that they're used in the MF. - for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { - pregs.insert(itr->first); - mri->setPhysRegUsed(itr->first); - } + for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { + if (mri->def_empty(Reg)) + continue; + pregs.insert(Reg); + mri->setPhysRegUsed(Reg); } BitVector reservedRegs = tri->getReservedRegs(*mf); @@ -220,7 +216,11 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, vregItr != vregEnd; ++vregItr) { unsigned vreg = *vregItr; const TargetRegisterClass *trc = mri->getRegClass(vreg); - const LiveInterval *vregLI = &lis->getInterval(vreg); + LiveInterval *vregLI = &LIS->getInterval(vreg); + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); // Compute an initial allowed set for the current vreg. typedef std::vector<unsigned> VRAllowed; @@ -228,76 +228,26 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (!reservedRegs.test(preg)) { - vrAllowed.push_back(preg); - } - } - - RegSet overlappingPRegs; - - // Record physical registers whose ranges overlap. - for (RegSet::const_iterator pregItr = pregs.begin(), - pregEnd = pregs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - const LiveInterval *pregLI = &lis->getInterval(preg); - - if (pregLI->empty()) { + if (reservedRegs.test(preg)) continue; - } - if (vregLI->overlaps(*pregLI)) - overlappingPRegs.insert(preg); - } + // vregLI crosses a regmask operand that clobbers preg. + if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) + continue; - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps(tri->getNumRegs()); - for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(), - rmEnd = regMaskSlots.end(); - rmItr != rmEnd; ++rmItr) { - SlotIndex rmIdx = *rmItr; - if (vregLI->liveAt(rmIdx)) { - MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); - const uint32_t* regMask = 0; - for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), - mopEnd = rmMI->operands_end(); - mopItr != mopEnd; ++mopItr) { - if (mopItr->isRegMask()) { - regMask = mopItr->getRegMask(); - break; - } + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { + if (vregLI->overlaps(LIS->getRegUnit(*Units))) { + Interference = true; + break; } - assert(regMask != 0 && "Couldn't find register mask."); - regMaskOverlaps.setBitsNotInMask(regMask); } - } + if (Interference) + continue; - for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { - if (regMaskOverlaps.test(preg)) - overlappingPRegs.insert(preg); - } - - for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), - pregEnd = overlappingPRegs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - - // Remove the register from the allowed set. - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), preg); - - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - - // Also remove any aliases. - for (MCRegAliasIterator AI(preg, tri, false); AI.isValid(); ++AI) { - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), *AI); - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - } + // preg is usable for this virtual register. + vrAllowed.push_back(preg); } // Construct the node. @@ -494,21 +444,17 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); au.addRequired<VirtRegMap>(); - au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); } void RegAllocPBQP::findVRegIntervalsToAlloc() { // Iterate over all live ranges. - for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - - // Ignore physical ones. - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (mri->reg_nodbg_empty(Reg)) continue; - - LiveInterval *li = itr->second; + LiveInterval *li = &lis->getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in @@ -576,9 +522,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, void RegAllocPBQP::finalizeAlloc() const { - typedef LiveIntervals::iterator LIIterator; - typedef LiveInterval::Ranges::const_iterator LRIterator; - // First allocate registers for the empty intervals. for (RegSet::const_iterator itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); @@ -607,7 +550,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); - rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -671,9 +613,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(); - - rmf->renderMachineFunction("After PBQP register allocation.", vrm); - vregsToAlloc.clear(); emptyIntervalVRegs.clear(); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 619a1e5a6d..733312fbd0 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -398,11 +398,6 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, assert(!CP.isPartial() && "This doesn't work for partial copies."); assert(!CP.isPhys() && "This doesn't work for physreg copies."); - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!LIS->hasInterval(CP.getDstReg())) - return false; - LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = @@ -464,19 +459,6 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // two value numbers. IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (MCSubRegIterator SR(IntB.reg, TRI); SR.isValid(); ++SR) { - if (!LIS->hasInterval(*SR)) - continue; - LiveInterval &SRLI = LIS->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, - LIS->getVNInfoAllocator()))); - } - } - // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValLR->valno) { // If B1 is killed by a PHI, then the merged live range must also be killed @@ -558,10 +540,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { assert (!CP.isPhys()); - // Bail if there is no dst interval. - if (!LIS->hasInterval(CP.getDstReg())) - return false; - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); LiveInterval &IntA = @@ -797,12 +775,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { - unsigned reg = NewMIImplDefs[i]; - LiveInterval &li = LIS->getInterval(reg); - VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), - LIS->getVNInfoAllocator()); - LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); - li.addRange(lr); + unsigned Reg = NewMIImplDefs[i]; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) + LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } CopyMI->eraseFromParent(); @@ -870,7 +846,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - LiveInterval &DstInt = LIS->getInterval(DstReg); + LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); // Update LiveDebugVariables. LDV->renameRegister(SrcReg, DstReg, SubIdx); @@ -883,8 +859,8 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // If SrcReg wasn't read, it may still be the case that DstReg is live-in // because SrcReg is a sub-register. - if (!Reads && SubIdx) - Reads = DstInt.liveAt(LIS->getInstructionIndex(UseMI)); + if (DstInt && !Reads && SubIdx) + Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { @@ -1077,8 +1053,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Update regalloc hint. TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); - DEBUG(dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI) - << ' ' << LIS->getInterval(CP.getDstReg()) << '\n'); + DEBUG({ + dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); + if (!CP.isPhys()) + dbgs() << LIS->getInterval(CP.getDstReg()); + dbgs() << '\n'; + }); ++numJoins; return true; @@ -1102,18 +1082,12 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // Deny any overlapping intervals. This depends on all the reserved // register live ranges to look like dead defs. - for (MCRegAliasIterator AS(CP.getDstReg(), TRI, true); AS.isValid(); ++AS) { - if (!LIS->hasInterval(*AS)) { - // Make sure at least DstReg itself exists before attempting a join. - if (*AS == CP.getDstReg()) - LIS->getOrCreateInterval(CP.getDstReg()); - continue; - } - if (RHS.overlaps(LIS->getInterval(*AS))) { - DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); + for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); return false; } - } + // Skip any value computations, we are not adding new values to the // reserved register. Also skip merging the live ranges, the reserved // register live range doesn't need to be accurate as long as all the @@ -1198,51 +1172,51 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + if (!MI || CP.isPartial() || CP.isPhys()) return false; - unsigned Dst = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(Src) || - !TargetRegisterInfo::isVirtualRegister(Dst)) + unsigned A = CP.getDstReg(); + if (!TargetRegisterInfo::isVirtualRegister(A)) return false; - unsigned A = CP.getDstReg(); unsigned B = CP.getSrcReg(); - - if (B == Dst) - std::swap(A, B); - assert(Dst == A); - - const MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); - - if (!OtherMI || !OtherMI->isFullCopy()) + if (!TargetRegisterInfo::isVirtualRegister(B)) return false; - unsigned OtherDst = OtherMI->getOperand(0).getReg(); - unsigned OtherSrc = OtherMI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || - !TargetRegisterInfo::isVirtualRegister(OtherDst)) + MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); + if (!OtherMI) return false; - assert(OtherDst == B); - - if (Src != OtherSrc) - return false; + if (MI->isImplicitDef()) { + DupCopies.push_back(MI); + return true; + } else { + if (!MI->isFullCopy()) + return false; + unsigned Src = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Src)) + return false; + if (!OtherMI->isFullCopy()) + return false; + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) + return false; - // If the copies use two different value numbers of X, we cannot merge - // A and B. - LiveInterval &SrcInt = li.getInterval(Src); - // getVNInfoBefore returns NULL for undef copies. In this case, the - // optimization is still safe. - if (SrcInt.getVNInfoBefore(OtherVNI->def) != SrcInt.getVNInfoBefore(VNI->def)) - return false; + if (Src != OtherSrc) + return false; - DupCopies.push_back(MI); + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + // getVNInfoBefore returns NULL for undef copies. In this case, the + // optimization is still safe. + if (SrcInt.getVNInfoBefore(OtherVNI->def) != + SrcInt.getVNInfoBefore(VNI->def)) + return false; - return true; + DupCopies.push_back(MI); + return true; + } } /// joinIntervals - Attempt to join these two intervals. On failure, this @@ -1280,7 +1254,7 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the RHS. @@ -1309,7 +1283,7 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the LHS. @@ -1455,14 +1429,17 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { if (!ErasedInstrs.insert(MI)) continue; - // We have pretended that the assignment to B in + // If MI is a copy, then we have pretended that the assignment to B in // A = X // B = X // was actually a copy from A. Now that we decided to coalesce A and B, // transform the code into // A = X - unsigned Src = MI->getOperand(1).getReg(); - SourceRegisters.push_back(Src); + // In the case of the implicit_def, we just have to remove it. + if (!MI->isImplicitDef()) { + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + } LIS->RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); } diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp deleted file mode 100644 index 6020908d91..0000000000 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "rendermf" - -#include "RenderMachineFunction.h" - -#include "VirtRegMap.h" - -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" - -#include <sstream> - -using namespace llvm; - -char RenderMachineFunction::ID = 0; -INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) - -static cl::opt<std::string> -outputFileSuffix("rmf-file-suffix", - cl::desc("Appended to function name to get output file name " - "(default: \".html\")"), - cl::init(".html"), cl::Hidden); - -static cl::opt<std::string> -machineFuncsToRender("rmf-funcs", - cl::desc("Comma separated list of functions to render" - ", or \"*\"."), - cl::init(""), cl::Hidden); - -static cl::opt<std::string> -pressureClasses("rmf-classes", - cl::desc("Register classes to render pressure for."), - cl::init(""), cl::Hidden); - -static cl::opt<std::string> -showIntervals("rmf-intervals", - cl::desc("Live intervals to show alongside code."), - cl::init(""), cl::Hidden); - -static cl::opt<bool> -filterEmpty("rmf-filter-empty-intervals", - cl::desc("Don't display empty intervals."), - cl::init(true), cl::Hidden); - -static cl::opt<bool> -showEmptyIndexes("rmf-empty-indexes", - cl::desc("Render indexes not associated with instructions or " - "MBB starts."), - cl::init(false), cl::Hidden); - -static cl::opt<bool> -useFancyVerticals("rmf-fancy-verts", - cl::desc("Use SVG for vertical text."), - cl::init(true), cl::Hidden); - -static cl::opt<bool> -prettyHTML("rmf-pretty-html", - cl::desc("Pretty print HTML. For debugging the renderer only.."), - cl::init(false), cl::Hidden); - - -namespace llvm { - - bool MFRenderingOptions::renderingOptionsProcessed; - std::set<std::string> MFRenderingOptions::mfNamesToRender; - bool MFRenderingOptions::renderAllMFs = false; - - std::set<std::string> MFRenderingOptions::classNamesToRender; - bool MFRenderingOptions::renderAllClasses = false; - - std::set<std::pair<unsigned, unsigned> > - MFRenderingOptions::intervalNumsToRender; - unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; - - template <typename OutputItr> - void MFRenderingOptions::splitComaSeperatedList(const std::string &s, - OutputItr outItr) { - std::string::const_iterator curPos = s.begin(); - std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); - while (nextComa != s.end()) { - std::string elem; - std::copy(curPos, nextComa, std::back_inserter(elem)); - *outItr = elem; - ++outItr; - curPos = llvm::next(nextComa); - nextComa = std::find(curPos, s.end(), ','); - } - - if (curPos != s.end()) { - std::string elem; - std::copy(curPos, s.end(), std::back_inserter(elem)); - *outItr = elem; - ++outItr; - } - } - - void MFRenderingOptions::processOptions() { - if (!renderingOptionsProcessed) { - processFuncNames(); - processRegClassNames(); - processIntervalNumbers(); - renderingOptionsProcessed = true; - } - } - - void MFRenderingOptions::processFuncNames() { - if (machineFuncsToRender == "*") { - renderAllMFs = true; - } else { - splitComaSeperatedList(machineFuncsToRender, - std::inserter(mfNamesToRender, - mfNamesToRender.begin())); - } - } - - void MFRenderingOptions::processRegClassNames() { - if (pressureClasses == "*") { - renderAllClasses = true; - } else { - splitComaSeperatedList(pressureClasses, - std::inserter(classNamesToRender, - classNamesToRender.begin())); - } - } - - void MFRenderingOptions::processIntervalNumbers() { - std::set<std::string> intervalRanges; - splitComaSeperatedList(showIntervals, - std::inserter(intervalRanges, - intervalRanges.begin())); - std::for_each(intervalRanges.begin(), intervalRanges.end(), - processIntervalRange); - } - - void MFRenderingOptions::processIntervalRange( - const std::string &intervalRangeStr) { - if (intervalRangeStr == "*") { - intervalTypesToRender |= All; - } else if (intervalRangeStr == "virt-nospills*") { - intervalTypesToRender |= VirtNoSpills; - } else if (intervalRangeStr == "spills*") { - intervalTypesToRender |= VirtSpills; - } else if (intervalRangeStr == "virt*") { - intervalTypesToRender |= AllVirt; - } else if (intervalRangeStr == "phys*") { - intervalTypesToRender |= AllPhys; - } else { - std::istringstream iss(intervalRangeStr); - unsigned reg1, reg2; - if ((iss >> reg1 >> std::ws)) { - if (iss.eof()) { - intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); - } else { - char c; - iss >> c; - if (c == '-' && (iss >> reg2)) { - intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); - } else { - dbgs() << "Warning: Invalid interval range \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } else { - dbgs() << "Warning: Invalid interval number \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } - - void MFRenderingOptions::setup(MachineFunction *mf, - const TargetRegisterInfo *tri, - LiveIntervals *lis, - const RenderMachineFunction *rmf) { - this->mf = mf; - this->tri = tri; - this->lis = lis; - this->rmf = rmf; - - clear(); - } - - void MFRenderingOptions::clear() { - regClassesTranslatedToCurrentFunction = false; - regClassSet.clear(); - - intervalsTranslatedToCurrentFunction = false; - intervalSet.clear(); - } - - void MFRenderingOptions::resetRenderSpecificOptions() { - intervalSet.clear(); - intervalsTranslatedToCurrentFunction = false; - } - - bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { - processOptions(); - - return (renderAllMFs || - mfNamesToRender.find(mf->getFunction()->getName()) != - mfNamesToRender.end()); - } - - const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ - translateRegClassNamesToCurrentFunction(); - return regClassSet; - } - - const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { - translateIntervalNumbersToCurrentFunction(); - return intervalSet; - } - - bool MFRenderingOptions::renderEmptyIndexes() const { - return showEmptyIndexes; - } - - bool MFRenderingOptions::fancyVerticals() const { - return useFancyVerticals; - } - - void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { - if (!regClassesTranslatedToCurrentFunction) { - processOptions(); - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - if (renderAllClasses || - classNamesToRender.find(trc->getName()) != - classNamesToRender.end()) { - regClassSet.insert(trc); - } - } - regClassesTranslatedToCurrentFunction = true; - } - } - - void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { - if (!intervalsTranslatedToCurrentFunction) { - processOptions(); - - // If we're not just doing explicit then do a copy over all matching - // types. - if (intervalTypesToRender != ExplicitOnly) { - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (filterEmpty && li->empty()) - continue; - - if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && - (intervalTypesToRender & AllPhys))) { - intervalSet.insert(li); - } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { - if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || - ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { - intervalSet.insert(li); - } - } - } - } - - // If we need to process the explicit list... - if (intervalTypesToRender != All) { - for (std::set<std::pair<unsigned, unsigned> >::const_iterator - regRangeItr = intervalNumsToRender.begin(), - regRangeEnd = intervalNumsToRender.end(); - regRangeItr != regRangeEnd; ++regRangeItr) { - const std::pair<unsigned, unsigned> &range = *regRangeItr; - for (unsigned reg = range.first; reg != range.second; ++reg) { - if (lis->hasInterval(reg)) { - intervalSet.insert(&lis->getInterval(reg)); - } - } - } - } - - intervalsTranslatedToCurrentFunction = true; - } - } - - // ---------- TargetRegisterExtraInformation implementation ---------- - - TargetRegisterExtraInfo::TargetRegisterExtraInfo() - : mapsPopulated(false) { - } - - void TargetRegisterExtraInfo::setup(MachineFunction *mf, - MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, - LiveIntervals *lis) { - this->mf = mf; - this->mri = mri; - this->tri = tri; - this->lis = lis; - } - - void TargetRegisterExtraInfo::reset() { - if (!mapsPopulated) { - initWorst(); - //initBounds(); - initCapacity(); - mapsPopulated = true; - } - - resetPressureAndLiveStates(); - } - - void TargetRegisterExtraInfo::clear() { - prWorst.clear(); - vrWorst.clear(); - capacityMap.clear(); - pressureMap.clear(); - //liveStatesMap.clear(); - mapsPopulated = false; - } - - void TargetRegisterExtraInfo::initWorst() { - assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && - "Worst map already initialised?"); - - // Start with the physical registers. - for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { - WorstMapLine &pregLine = prWorst[preg]; - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - unsigned numOverlaps = 0; - for (TargetRegisterClass::iterator rItr = trc->begin(), - rEnd = trc->end(); - rItr != rEnd; ++rItr) { - unsigned trcPReg = *rItr; - if (tri->regsOverlap(preg, trcPReg)) - ++numOverlaps; - } - - pregLine[trc] = numOverlaps; - } - } - - // Now the register classes. - for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rc1Itr != rcEnd; ++rc1Itr) { - const TargetRegisterClass *trc1 = *rc1Itr; - WorstMapLine &classLine = vrWorst[trc1]; - - for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); - rc2Itr != rcEnd; ++rc2Itr) { - const TargetRegisterClass *trc2 = *rc2Itr; - - unsigned worst = 0; - - for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), - trc1End = trc1->end(); - trc1Itr != trc1End; ++trc1Itr) { - unsigned trc1Reg = *trc1Itr; - unsigned trc1RegWorst = 0; - - for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), - trc2End = trc2->end(); - trc2Itr != trc2End; ++trc2Itr) { - unsigned trc2Reg = *trc2Itr; - if (tri->regsOverlap(trc1Reg, trc2Reg)) - ++trc1RegWorst; - } - if (trc1RegWorst > worst) { - worst = trc1RegWorst; - } - } - - if (worst != 0) { - classLine[trc2] = worst; - } - } - } - } - - unsigned TargetRegisterExtraInfo::getWorst( - unsigned reg, - const TargetRegisterClass *trc) const { - const WorstMapLine *wml = 0; - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - PRWorstMap::const_iterator prwItr = prWorst.find(reg); - assert(prwItr != prWorst.end() && "Missing prWorst entry."); - wml = &prwItr->second; - } else { - const TargetRegisterClass *regTRC = mri->getRegClass(reg); - VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); - assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); - wml = &vrwItr->second; - } - - WorstMapLine::const_iterator wmlItr = wml->find(trc); - if (wmlItr == wml->end()) - return 0; - - return wmlItr->second; - } - - void TargetRegisterExtraInfo::initCapacity() { - assert(!mapsPopulated && capacityMap.empty() && - "Capacity map already initialised?"); - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - unsigned capacity = trc->getRawAllocationOrder(*mf).size(); - - if (capacity != 0) - capacityMap[trc] = capacity; - } - } - - unsigned TargetRegisterExtraInfo::getCapacity( - const TargetRegisterClass *trc) const { - CapacityMap::const_iterator cmItr = capacityMap.find(trc); - assert(cmItr != capacityMap.end() && - "vreg with unallocable register class"); - return cmItr->second; - } - - void TargetRegisterExtraInfo::resetPressureAndLiveStates() { - pressureMap.clear(); - //liveStatesMap.clear(); - - // Iterate over all slots. - - - // Iterate over all live intervals. - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) - continue; - - // For all ranges in the current interal. - for (LiveInterval::iterator lrItr = li->begin(), - lrEnd = li->end(); - lrItr != lrEnd; ++lrItr) { - LiveRange *lr = &*lrItr; - - // For all slots in the current range. - for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { - - // Record increased pressure at index for all overlapping classes. - for (TargetRegisterInfo::regclass_iterator - rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - if (trc->getRawAllocationOrder(*mf).empty()) - continue; - - unsigned worstAtI = getWorst(li->reg, trc); - - if (worstAtI != 0) { - pressureMap[i][trc] += worstAtI; - } - } - } - } - } - } - - unsigned TargetRegisterExtraInfo::getPressureAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - PressureMap::const_iterator pmItr = pressureMap.find(i); - if (pmItr == pressureMap.end()) - return 0; - const PressureMapLine &pmLine = pmItr->second; - PressureMapLine::const_iterator pmlItr = pmLine.find(trc); - if (pmlItr == pmLine.end()) - return 0; - return pmlItr->second; - } - - bool TargetRegisterExtraInfo::classOverCapacityAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - return (getPressureAtSlot(trc, i) > getCapacity(trc)); - } - - // ---------- MachineFunctionRenderer implementation ---------- - - void RenderMachineFunction::Spacer::print(raw_ostream &os) const { - if (!prettyHTML) - return; - for (unsigned i = 0; i < ns; ++i) { - os << " "; - } - } - - RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { - return Spacer(ns); - } - - raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { - s.print(os); - return os; - } - - template <typename Iterator> - std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { - std::string r; - - for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { - char c = *sItr; - - switch (c) { - case '<': r.append("<"); break; - case '>': r.append(">"); break; - case '&': r.append("&"); break; - case ' ': r.append(" "); break; - case '\"': r.append("""); break; - default: r.push_back(c); break; - } - } - - return r; - } - - RenderMachineFunction::LiveState - RenderMachineFunction::getLiveStateAt(const LiveInterval *li, - SlotIndex i) const { - const MachineInstr *mi = sis->getInstructionFromIndex(i); - - // For uses/defs recorded use/def indexes override current liveness and - // instruction operands (Only for the interval which records the indexes). - // FIXME: This is all wrong, uses and defs share the same slots. - if (i.isEarlyClobber() || i.isRegister()) { - UseDefs::const_iterator udItr = useDefs.find(li); - if (udItr != useDefs.end()) { - const SlotSet &slotSet = udItr->second; - if (slotSet.count(i)) { - if (i.isEarlyClobber()) { - return Used; - } - // else - return Defined; - } - } - } - - // If the slot is a load/store, or there's no info in the use/def set then - // use liveness and instruction operand info. - if (li->liveAt(i)) { - - if (mi == 0) { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } else { - if (i.isRegister() && mi->definesRegister(li->reg, tri)) { - return Defined; - } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { - return Used; - } else { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } - } - } - return Dead; - } - - RenderMachineFunction::PressureState - RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const { - if (trei.getPressureAtSlot(trc, i) == 0) { - return Zero; - } else if (trei.classOverCapacityAtSlot(trc, i)){ - return High; - } - return Low; - } - - /// \brief Render a machine instruction. - void RenderMachineFunction::renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const { - std::string s; - raw_string_ostream oss(s); - oss << *mi; - - os << escapeChars(oss.str()); - } - - template <typename T> - void RenderMachineFunction::renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const { - if (ro.fancyVerticals()) { - os << indent << "<object\n" - << indent + s(2) << "class=\"obj\"\n" - << indent + s(2) << "type=\"image/svg+xml\"\n" - << indent + s(2) << "width=\"14px\"\n" - << indent + s(2) << "height=\"55px\"\n" - << indent + s(2) << "data=\"data:image/svg+xml,\n" - << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n" - << indent + s(6) << "<text x='-55' y='10' " - "font-family='Courier' font-size='12' " - "transform='rotate(-90)' " - "text-rendering='optimizeSpeed' " - "fill='#000'>" << t << "</text>\n" - << indent + s(4) << "</svg>\">\n" - << indent << "</object>\n"; - } else { - std::ostringstream oss; - oss << t; - std::string tStr(oss.str()); - - os << indent; - for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); - tStrItr != tStrEnd; ++tStrItr) { - os << *tStrItr << "<br/>"; - } - os << "\n"; - } - } - - void RenderMachineFunction::insertCSS(const Spacer &indent, - raw_ostream &os) const { - os << indent << "<style type=\"text/css\">\n" - << indent + s(2) << "body { font-color: black; }\n" - << indent + s(2) << "table.code td { font-family: monospace; " - "border-width: 0px; border-style: solid; " - "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n" - << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n" - << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n" - << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n" - << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n" - << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n" - << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n" - << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n" - << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n" - << indent + s(2) << "table.code th { border-width: 0px; " - "border-style: solid; }\n" - << indent << "</style>\n"; - } - - void RenderMachineFunction::renderFunctionSummary( - const Spacer &indent, raw_ostream &os, - const char * const renderContextStr) const { - os << indent << "<h1>Function: " << mf->getFunction()->getName() - << "</h1>\n" - << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n"; - } - - - void RenderMachineFunction::renderPressureTableLegend( - const Spacer &indent, - raw_ostream &os) const { - os << indent << "<h2>Rendering Pressure Legend:</h2>\n" - << indent << "<table class=\"code\">\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<th>Pressure</th><th>Description</th>" - "<th>Appearance</th>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>No Pressure</td>" - "<td>No physical registers of this class requested.</td>" - "<td class=\"p-z\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>Low Pressure</td>" - "<td>Sufficient physical registers to meet demand.</td>" - "<td class=\"p-l\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent + s(2) << "<tr>\n" - << indent + s(4) << "<td>High Pressure</td>" - "<td>Potentially insufficient physical registers to meet demand.</td>" - "<td class=\"p-h\"> </td>\n" - << indent + s(2) << "</tr>\n" - << indent << "</table>\n"; - } - - template <typename CellType> - void RenderMachineFunction::renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair<CellType, unsigned> &rleAccumulator, - const std::map<CellType, std::string> &cellTypeStrs) const { - - if (rleAccumulator.second == 0) - return; - - typename std::map<CellType, std::string>::const_iterator ctsItr = - cellTypeStrs.find(rleAccumulator.first); - - assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); - - os << indent + s(4) << "<td class=\"" << ctsItr->second << "\""; - if (rleAccumulator.second > 1) - os << " colspan=" << rleAccumulator.second; - os << "></td>\n"; - } - - - void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const { - - std::map<LiveState, std::string> lsStrs; - lsStrs[Dead] = "l-n"; - lsStrs[Defined] = "l-d"; - lsStrs[Used] = "l-u"; - lsStrs[AliveReg] = "l-r"; - lsStrs[AliveStack] = "l-s"; - - std::map<PressureState, std::string> psStrs; - psStrs[Zero] = "p-z"; - psStrs[Low] = "p-l"; - psStrs[High] = "p-h"; - - // Open the table... - - os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n" - << indent + s(2) << "<tr>\n"; - - // Render the header row... - - os << indent + s(4) << "<th>index</th>\n" - << indent + s(4) << "<th>instr</th>\n"; - - // Render class names if necessary... - if (!ro.regClasses().empty()) { - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - os << indent + s(4) << "<th>\n"; - renderVertical(indent + s(6), os, trc->getName()); - os << indent + s(4) << "</th>\n"; - } - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "<th> </th>\n"; - - // Render interval numbers if necessary... - if (!ro.intervals().empty()) { - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = *liItr; - os << indent + s(4) << "<th>\n"; - renderVertical(indent + s(6), os, li->reg); - os << indent + s(4) << "</th>\n"; - } - } - - os << indent + s(2) << "</tr>\n"; - - // End header row, start with the data rows... - - MachineInstr *mi = 0; - - // Data rows: - for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); - i = i.getNextSlot()) { - - // Render the slot column. - os << indent + s(2) << "<tr height=6ex>\n"; - - // Render the code column. - if (i.isBlock()) { - MachineBasicBlock *mbb = sis->getMBBFromIndex(i); - mi = sis->getInstructionFromIndex(i); - - if (i == sis->getMBBStartIdx(mbb) || mi != 0 || - ro.renderEmptyIndexes()) { - os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n" - << indent + s(4) << "<td rowspan=4>\n"; - - if (i == sis->getMBBStartIdx(mbb)) { - os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; - } else if (mi != 0) { - os << indent + s(6) << " "; - renderMachineInstr(os, mi); - } else { - // Empty interval - leave blank. - } - os << indent + s(4) << "</td>\n"; - } else { - i = i.getDeadSlot(); // <- Will be incremented to the next index. - continue; - } - } - - // Render the class columns. - if (!ro.regClasses().empty()) { - std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0); - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - PressureState newPressure = getPressureStateAt(trc, i); - - if (newPressure == psRLEAccumulator.first) { - ++psRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - psRLEAccumulator.first = newPressure; - psRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "<td width=2em></td>\n"; - - if (!ro.intervals().empty()) { - std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0); - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - const LiveInterval *li = *liItr; - LiveState newLiveness = getLiveStateAt(li, i); - - if (newLiveness == lsRLEAccumulator.first) { - ++lsRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - lsRLEAccumulator.first = newLiveness; - lsRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - } - os << indent + s(2) << "</tr>\n"; - } - - os << indent << "</table>\n"; - - if (!ro.regClasses().empty()) - renderPressureTableLegend(indent, os); - } - - void RenderMachineFunction::renderFunctionPage( - raw_ostream &os, - const char * const renderContextStr) const { - os << "<html>\n" - << s(2) << "<head>\n" - << s(4) << "<title>" << fqn << "</title>\n"; - - insertCSS(s(4), os); - - os << s(2) << "<head>\n" - << s(2) << "<body >\n"; - - renderFunctionSummary(s(4), os, renderContextStr); - - os << s(4) << "<br/><br/><br/>\n"; - - //renderLiveIntervalInfoTable(" ", os); - - os << s(4) << "<br/><br/><br/>\n"; - - renderCodeTablePlusPI(s(4), os); - - os << s(2) << "</body>\n" - << "</html>\n"; - } - - void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tri = mf->getTarget().getRegisterInfo(); - lis = &getAnalysis<LiveIntervals>(); - sis = &getAnalysis<SlotIndexes>(); - - trei.setup(mf, mri, tri, lis); - ro.setup(mf, tri, lis, this); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - return false; - } - - void RenderMachineFunction::releaseMemory() { - trei.clear(); - ro.clear(); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - } - - void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - const MachineInstr *mi = &*rItr; - if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); - } - if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); - } - } - } - - void RenderMachineFunction::rememberSpills( - const LiveInterval *li, - const std::vector<LiveInterval*> &spills) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(), - siEnd = spills.end(); - siItr != siEnd; ++siItr) { - const LiveInterval *spill = *siItr; - spillIntervals[li].insert(spill); - spillFor[spill] = li; - } - } - - bool RenderMachineFunction::isSpill(const LiveInterval *li) const { - SpillForMap::const_iterator sfItr = spillFor.find(li); - if (sfItr == spillFor.end()) - return false; - return true; - } - - void RenderMachineFunction::renderMachineFunction( - const char *renderContextStr, - const VirtRegMap *vrm, - const char *renderSuffix) { - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - this->vrm = vrm; - trei.reset(); - - std::string rpFileName(mf->getFunction()->getName().str() + - (renderSuffix ? renderSuffix : "") + - outputFileSuffix); - - std::string errMsg; - raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); - - renderFunctionPage(outFile, renderContextStr); - - ro.resetRenderSpecificOptions(); - } - - std::string RenderMachineFunction::escapeChars(const std::string &s) const { - return escapeChars(s.begin(), s.end()); - } - -} diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h deleted file mode 100644 index 85719923c0..0000000000 --- a/lib/CodeGen/RenderMachineFunction.h +++ /dev/null @@ -1,338 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H -#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H - -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -#include <algorithm> -#include <map> -#include <set> -#include <string> - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - class MachineInstr; - class MachineRegisterInfo; - class RenderMachineFunction; - class TargetRegisterClass; - class TargetRegisterInfo; - class VirtRegMap; - class raw_ostream; - - /// \brief Helper class to process rendering options. Tries to be as lazy as - /// possible. - class MFRenderingOptions { - public: - - struct RegClassComp { - bool operator()(const TargetRegisterClass *trc1, - const TargetRegisterClass *trc2) const { - std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); - return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), - trc2Name.begin(), trc2Name.end()); - } - }; - - typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet; - - struct IntervalComp { - bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { - return li1->reg < li2->reg; - } - }; - - typedef std::set<const LiveInterval*, IntervalComp> IntervalSet; - - /// Initialise the rendering options. - void setup(MachineFunction *mf, const TargetRegisterInfo *tri, - LiveIntervals *lis, const RenderMachineFunction *rmf); - - /// Clear translations of options to the current function. - void clear(); - - /// Reset any options computed for this specific rendering. - void resetRenderSpecificOptions(); - - /// Should we render the current function. - bool shouldRenderCurrentMachineFunction() const; - - /// Return the set of register classes to render pressure for. - const RegClassSet& regClasses() const; - - /// Return the set of live intervals to render liveness for. - const IntervalSet& intervals() const; - - /// Render indexes which are not associated with instructions / MBB starts. - bool renderEmptyIndexes() const; - - /// Return whether or not to render using SVG for fancy vertical text. - bool fancyVerticals() const; - - private: - - static bool renderingOptionsProcessed; - static std::set<std::string> mfNamesToRender; - static bool renderAllMFs; - - static std::set<std::string> classNamesToRender; - static bool renderAllClasses; - - - static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender; - typedef enum { ExplicitOnly = 0, - AllPhys = 1, - VirtNoSpills = 2, - VirtSpills = 4, - AllVirt = 6, - All = 7 } - IntervalTypesToRender; - static unsigned intervalTypesToRender; - - template <typename OutputItr> - static void splitComaSeperatedList(const std::string &s, OutputItr outItr); - - static void processOptions(); - - static void processFuncNames(); - static void processRegClassNames(); - static void processIntervalNumbers(); - - static void processIntervalRange(const std::string &intervalRangeStr); - - MachineFunction *mf; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - const RenderMachineFunction *rmf; - - mutable bool regClassesTranslatedToCurrentFunction; - mutable RegClassSet regClassSet; - - mutable bool intervalsTranslatedToCurrentFunction; - mutable IntervalSet intervalSet; - - void translateRegClassNamesToCurrentFunction() const; - - void translateIntervalNumbersToCurrentFunction() const; - }; - - /// \brief Provide extra information about the physical and virtual registers - /// in the function being compiled. - class TargetRegisterExtraInfo { - public: - TargetRegisterExtraInfo(); - - /// \brief Set up TargetRegisterExtraInfo with pointers to necessary - /// sources of information. - void setup(MachineFunction *mf, MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, LiveIntervals *lis); - - /// \brief Recompute tables for changed function. - void reset(); - - /// \brief Free all tables in TargetRegisterExtraInfo. - void clear(); - - /// \brief Maximum number of registers from trc which alias reg. - unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; - - /// \brief Returns the number of allocable registers in trc. - unsigned getCapacity(const TargetRegisterClass *trc) const; - - /// \brief Return the number of registers of class trc that may be - /// needed at slot i. - unsigned getPressureAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - /// \brief Return true if the number of registers of type trc that may be - /// needed at slot i is greater than the capacity of trc. - bool classOverCapacityAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - private: - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - - typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine; - typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap; - VRWorstMap vrWorst; - - typedef std::map<unsigned, WorstMapLine> PRWorstMap; - PRWorstMap prWorst; - - typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap; - CapacityMap capacityMap; - - typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine; - typedef std::map<SlotIndex, PressureMapLine> PressureMap; - PressureMap pressureMap; - - bool mapsPopulated; - - /// \brief Initialise the 'worst' table. - void initWorst(); - - /// \brief Initialise the 'capacity' table. - void initCapacity(); - - /// \brief Initialise/Reset the 'pressure' and live states tables. - void resetPressureAndLiveStates(); - }; - - /// \brief Render MachineFunction objects and related information to a HTML - /// page. - class RenderMachineFunction : public MachineFunctionPass { - public: - static char ID; - - RenderMachineFunction() : MachineFunctionPass(ID) { - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - void rememberUseDefs(const LiveInterval *li); - - void rememberSpills(const LiveInterval *li, - const std::vector<LiveInterval*> &spills); - - bool isSpill(const LiveInterval *li) const; - - /// \brief Render this machine function to HTML. - /// - /// @param renderContextStr This parameter will be included in the top of - /// the html file to explain where (in the - /// codegen pipeline) this function was rendered - /// from. Set it to something like - /// "Pre-register-allocation". - /// @param vrm If non-null the VRM will be queried to determine - /// whether a virtual register was allocated to a - /// physical register or spilled. - /// @param renderFilePrefix This string will be appended to the function - /// name (before the output file suffix) to enable - /// multiple renderings from the same function. - void renderMachineFunction(const char *renderContextStr, - const VirtRegMap *vrm = 0, - const char *renderSuffix = 0); - - private: - class Spacer; - friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); - - std::string fqn; - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - SlotIndexes *sis; - const VirtRegMap *vrm; - - TargetRegisterExtraInfo trei; - MFRenderingOptions ro; - - - - // Utilities. - typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; - LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; - - typedef enum { Zero, Low, High } PressureState; - PressureState getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const; - - typedef std::map<const LiveInterval*, std::set<const LiveInterval*> > - SpillIntervals; - SpillIntervals spillIntervals; - - typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap; - SpillForMap spillFor; - - typedef std::set<SlotIndex> SlotSet; - typedef std::map<const LiveInterval*, SlotSet> UseDefs; - UseDefs useDefs; - - // ---------- Rendering methods ---------- - - /// For inserting spaces when pretty printing. - class Spacer { - public: - explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} - Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } - void print(raw_ostream &os) const; - private: - unsigned ns; - }; - - Spacer s(unsigned ns) const; - - template <typename Iterator> - std::string escapeChars(Iterator sBegin, Iterator sEnd) const; - - /// \brief Render a machine instruction. - void renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const; - - /// \brief Render vertical text. - template <typename T> - void renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const; - - /// \brief Insert CSS layout info. - void insertCSS(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a brief summary of the function (including rendering - /// context). - void renderFunctionSummary(const Spacer &indent, - raw_ostream &os, - const char * const renderContextStr) const; - - /// \brief Render a legend for the pressure table. - void renderPressureTableLegend(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a consecutive set of HTML cells of the same class using - /// the colspan attribute for run-length encoding. - template <typename CellType> - void renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair<CellType, unsigned> &rleAccumulator, - const std::map<CellType, std::string> &cellTypeStrs) const; - - /// \brief Render code listing, potentially with register pressure - /// and live intervals shown alongside. - void renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render the HTML page representing the MachineFunction. - void renderFunctionPage(raw_ostream &os, - const char * const renderContextStr) const; - - std::string escapeChars(const std::string &s) const; - }; -} - -#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 24b9cd0b45..110f478f48 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -413,8 +413,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // SSA defs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) - return; + // + // FIXME: This optimization is disabled pending PR13112. + //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) + // return; // Add output dependence to the next nearest def of this vreg. // diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index a6bdc3be32..75e816720f 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_library(LLVMSelectionDAG TargetLowering.cpp TargetSelectionDAGInfo.cpp ) + +add_dependencies(LLVMSelectionDAG intrinsics_gen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1aee7c572d..f16b1e7f4a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1322,6 +1322,9 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorkList(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); @@ -2524,7 +2527,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); // Replace uses of the EXTLOAD with the new ZEXTLOAD. - CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } } // Fold the AND away, taking care not to fold to the old load node if we @@ -5010,6 +5020,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT PtrType = N0.getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + // It's not possible to generate a constant of extended or untyped type. + return SDValue(); + // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { @@ -5604,7 +5618,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } - // fold (fadd c1, c2) -> (fadd c1, c2) + // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS @@ -5633,6 +5647,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // FADD -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegal(ISD::FMA, VT)) { + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + } + return SDValue(); } @@ -5690,6 +5724,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // FSUB -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegal(ISD::FMA, VT)) { + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + } + } + return SDValue(); } @@ -7071,7 +7128,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Tmp; switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP type"); - case MVT::f80: // We don't do this for these yet. + case MVT::f16: // We don't do this for these yet. + case MVT::f80: case MVT::f128: case MVT::ppcf128: break; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6464cf4ecc..07dc36577f 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,6 +40,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -51,7 +52,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8dde919079..3e18ea7ac9 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -15,13 +15,13 @@ #define DEBUG_TYPE "function-lowering-info" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a57d47ffb3..f154271894 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -470,6 +470,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c2972b051f..a36137db4a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -20,10 +24,6 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1930,9 +1930,11 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != 0; } -/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// useDivRem - Only issue divrem libcall if both quotient and remainder are /// needed. -static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { +static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { + // The other use might have been replaced with a divrem already. + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; unsigned OtherOpcode = 0; if (isSigned) OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; @@ -1946,7 +1948,7 @@ static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { SDNode *User = *UI; if (User == Node) continue; - if (User->getOpcode() == OtherOpcode && + if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && User->getOperand(0) == Op0 && User->getOperand(1) == Op1) return true; @@ -3092,7 +3094,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, false))) { + useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y @@ -3120,7 +3122,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDVTList VTs = DAG.getVTList(VT, VT); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, true))) + useDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 0ae6651075..106b086184 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -511,6 +511,7 @@ private: void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); @@ -555,6 +556,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9fe4480d11..704f99bcf0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -71,6 +71,9 @@ class VectorLegalizer { // operands to a different type and bitcasting the result back to the // original type. SDValue PromoteVectorOp(SDValue Op); + // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input + // operand to the next size up. + SDValue PromoteVectorOpINT_TO_FP(SDValue Op); public: bool Run(); @@ -231,9 +234,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; + switch (Op.getOpcode()) { + default: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + Result = PromoteVectorOpINT_TO_FP(Op); + Changed = true; + break; + } break; case TargetLowering::Legal: break; case TargetLowering::Custom: { @@ -293,6 +306,44 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + EVT VT = Op.getOperand(0).getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + + // Normal getTypeToPromoteTo() doesn't work here, as that will promote + // by widening the vector w/ the same element width and twice the number + // of elements. We want the other way around, the same number of elements, + // each twice the width. + // + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); + assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + + DebugLoc dl = Op.getDebugLoc(); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], + Operands.size()); +} + SDValue VectorLegalizer::ExpandLoad(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5f23f01daf..d09411c42f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -115,6 +115,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SRL: R = ScalarizeVecRes_BinOp(N); break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -129,6 +132,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Op0.getValueType(), Op0, Op1, Op2); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -529,6 +540,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -548,6 +562,22 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); } +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ff0877c981..a8dce2176f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -14,16 +14,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 88a6baf02f..e5d24e9d18 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" @@ -42,7 +43,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -1828,9 +1828,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) visitInlineAsm(&I); - else + else if (Fn && Fn->isIntrinsic()) { + assert(Fn->getIntrinsicID() == Intrinsic::donothing); + return; // ignore invokes to @llvm.donothing + } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it @@ -2048,7 +2052,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !TLI.getTargetMachine().Options.DisableJumpTables && + return TLI.supportJumpTables() && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } @@ -4934,7 +4938,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType()); - if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isOperationLegal(ISD::FMA, VT) && + TLI.isFMAFasterThanMulAndAdd(VT)){ setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -5176,6 +5182,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::lifetime_end: // Discard region information. return 0; + case Intrinsic::donothing: + // ignore + return 0; // @LOCALMOD-BEGIN // Native Client Intrinsics for TLS setup / layout. case Intrinsic::nacl_thread_stack_padding: { @@ -6272,8 +6281,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); - assert(!OpInfo.isIndirect && - "Don't know how to handle indirect register inputs yet!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "Don't know how to handle indirect register inputs yet " + "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 69ba83ed27..5233479701 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Assembly/Writer.h" @@ -19,7 +20,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e3d054d4ce..f02e895fcb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,12 +14,8 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -27,7 +23,10 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6cde05aea8..173ffac329 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -13,13 +13,13 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2020cc7f7b..6212918d6c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -612,6 +612,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; + SupportJumpTables = true; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0016047a13..8a6b120f97 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -26,13 +26,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "shadowstackgc" -#include "llvm/CodeGen/GCs.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 9a86f32d8f..980bd7414c 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -13,28 +13,28 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sjljehprepare" -#include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <set> using namespace llvm; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 7af08f591f..54be88a8bb 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -560,8 +560,8 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, return 1; unsigned Class = MI->getDesc().getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) return UOps; // The # of u-ops is dynamically determined. The specific target should diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index bdc27485a7..babfacb782 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -365,10 +365,17 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { if (Priority == 65535) return StaticCtorSection; - std::string Name = std::string(".ctors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } } const MCSection * @@ -378,10 +385,35 @@ TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { if (Priority == 65535) return StaticDtorSection; - std::string Name = std::string(".dtors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } +} + +void +TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { + UseInitArray = UseInitArray_; + if (!UseInitArray) + return; + + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index ec2b577230..153f8711d8 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1456,6 +1456,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { "two address instruction invalid"); unsigned regB = mi->getOperand(SrcIdx).getReg(); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (mi->getOperand(SrcIdx).isUndef()) { + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, MF)) + MRI->constrainRegClass(DstReg, RC); + mi->getOperand(SrcIdx).setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi); + continue; + } TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); } @@ -1523,7 +1536,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // Emit a copy or rematerialize the definition. bool isCopy = false; const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getVRegDef(regB); + MachineInstr *DefMI = MRI->getUniqueVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && @@ -1609,19 +1622,20 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); - } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop @@ -1708,9 +1722,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, continue; // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); - if (SrcDefMI->getParent() != DstDefMI->getParent()) + MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); + if (!SrcDefMI || !DstDefMI || + SrcDefMI->getParent() != DstDefMI->getParent()) continue; // If there are no other uses than copies which feed into @@ -1846,6 +1861,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallVector<unsigned, 4> RealSrcs; SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + // Nothing needs to be inserted for <undef> operands. + if (MI->getOperand(i).isUndef()) { + MI->getOperand(i).setReg(0); + continue; + } unsigned SrcReg = MI->getOperand(i).getReg(); unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); @@ -1855,7 +1875,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *DefMI = NULL; if (!MI->getOperand(i).getSubReg() && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DefMI = MRI->getVRegDef(SrcReg); + DefMI = MRI->getUniqueVRegDef(SrcReg); } if (DefMI && DefMI->isImplicitDef()) { diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index 24bf97ff60..2683990e58 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -82,7 +82,7 @@ void DWARFCompileUnit::clear() { Abbrevs = 0; AddrSize = 0; BaseAddr = 0; - DieArray.clear(); + clearDIEs(false); } void DWARFCompileUnit::dump(raw_ostream &OS) { @@ -201,7 +201,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { } void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) { - if (DieArray.size() > 1) { + if (DieArray.size() > (unsigned)keep_compile_unit_die) { // std::vectors never get any smaller when resized to a smaller size, // or when clear() or erase() are called, the size will report that it // is smaller, but the memory allocated remains intact (call capacity() @@ -227,8 +227,8 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, // all compile units to stay loaded when they weren't needed. So we can end // up parsing the DWARF and then throwing them all away to keep memory usage // down. - const bool clear_dies = extractDIEsIfNeeded(false) > 1; - + const bool clear_dies = extractDIEsIfNeeded(false) > 1 && + clear_dies_if_already_not_parsed; DieArray[0].buildAddressRangeTable(this, debug_aranges); // Keep memory down by clearing DIEs if this generate function @@ -236,3 +236,13 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, if (clear_dies) clearDIEs(true); } + +const DWARFDebugInfoEntryMinimal* +DWARFCompileUnit::getFunctionDIEForAddress(int64_t address) { + size_t n = extractDIEsIfNeeded(false); + for (size_t i = 0; i != n; i++) { + if (DieArray[i].addressRangeContainsAddress(this, address)) + return &DieArray[i]; + } + return 0; +} diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index d9167292a9..dc558da714 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -104,6 +104,11 @@ public: void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, bool clear_dies_if_already_not_parsed); + /// getFunctionDIEForAddress - Returns pointer to parsed subprogram DIE, + /// address ranges of which contain the provided address, + /// or NULL if there is no such subprogram. The pointer + /// is valid until DWARFCompileUnit::clear() or clearDIEs() is called. + const DWARFDebugInfoEntryMinimal *getFunctionDIEForAddress(int64_t address); }; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index dccadc4ea4..6be230e73a 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -140,30 +140,42 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { return 0; } -DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) { +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, + DILineInfoSpecifier specifier) { // First, get the offset of the compile unit. uint32_t cuOffset = getDebugAranges()->findAddress(address); // Retrieve the compile unit. DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); if (!cu) - return DILineInfo("<invalid>", 0, 0); - // Get the line table for this compile unit. - const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); - if (!lineTable) - return DILineInfo("<invalid>", 0, 0); - // Get the index of the row we're looking for in the line table. - uint64_t hiPC = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_high_pc, - -1ULL); - uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); - if (rowIndex == -1U) - return DILineInfo("<invalid>", 0, 0); - - // From here, contruct the DILineInfo. - const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; - const std::string &fileName = lineTable->Prologue.FileNames[row.File-1].Name; - - return DILineInfo(fileName.c_str(), row.Line, row.Column); + return DILineInfo(); + const char *fileName = "<invalid>"; + const char *functionName = "<invalid>"; + uint32_t line = 0; + uint32_t column = 0; + if (specifier.needs(DILineInfoSpecifier::FunctionName)) { + const DWARFDebugInfoEntryMinimal *function_die = + cu->getFunctionDIEForAddress(address); + if (function_die) + functionName = function_die->getSubprogramName(cu); + } + if (specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + // Get the line table for this compile unit. + const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); + if (lineTable) { + // Get the index of the row we're looking for in the line table. + uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned( + cu, DW_AT_high_pc, -1ULL); + uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); + if (rowIndex != -1U) { + const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; + // Take file/line info from the line table. + fileName = lineTable->Prologue.FileNames[row.File - 1].Name.c_str(); + line = row.Line; + column = row.Column; + } + } + } + return DILineInfo(fileName, functionName, line, column); } void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index d2e763a87a..e55a27e698 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -66,7 +66,8 @@ public: const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); - virtual DILineInfo getLineInfoForAddress(uint64_t address); + virtual DILineInfo getLineInfoForAddress(uint64_t address, + DILineInfoSpecifier specifier = DILineInfoSpecifier()); bool isLittleEndian() const { return IsLittleEndian; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index 1788145356..ef470e5799 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -93,6 +93,7 @@ bool DWARFDebugAranges::generate(DWARFContext *ctx) { cu->buildAddressRangeTable(this, true); } } + sort(true, /* overlap size */ 0); return !isEmpty(); } @@ -221,4 +222,3 @@ bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const { HiPC = Aranges.back().HiPC(); return true; } - diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 236db97c44..1024b45255 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -440,3 +440,51 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, } } } + +bool +DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( + const DWARFCompileUnit *cu, const uint64_t address) const { + if (!isNULL() && getTag() == DW_TAG_subprogram) { + uint64_t hi_pc = -1ULL; + uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); + if (lo_pc != -1ULL) + hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); + if (hi_pc != -1ULL) { + return (lo_pc <= address && address < hi_pc); + } + } + return false; +} + +static inline const char* +getSubprogramNameFromDie(const DWARFCompileUnit *cu, + const DWARFDebugInfoEntryMinimal *die) { + const char *result = 0; + if (!die->isNULL() && die->getTag() == DW_TAG_subprogram) { + // Try to get mangled name if possible. + result = die->getAttributeValueAsString(cu, DW_AT_MIPS_linkage_name, 0); + if (result == 0) + result = die->getAttributeValueAsString(cu, DW_AT_linkage_name, 0); + if (result == 0) + result = die->getAttributeValueAsString(cu, DW_AT_name, 0); + } + return result; +} + +const char* +DWARFDebugInfoEntryMinimal::getSubprogramName( + const DWARFCompileUnit *cu) const { + if (isNULL() || getTag() != DW_TAG_subprogram) + return 0; + const char *name = getSubprogramNameFromDie(cu, this); + if (name == 0) { + // Try to get name from specification DIE. + uint32_t ref = getAttributeValueAsReference(cu, DW_AT_specification, -1U); + if (ref != -1U) { + DWARFDebugInfoEntryMinimal spec_die; + if (spec_die.extract(cu, &ref)) + name = getSubprogramNameFromDie(cu, &spec_die); + } + } + return name; +} diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index 37b3bcdd96..1a040a53a3 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -128,6 +128,13 @@ public: void buildAddressRangeTable(const DWARFCompileUnit *cu, DWARFDebugAranges *debug_aranges) const; + + bool addressRangeContainsAddress(const DWARFCompileUnit *cu, + const uint64_t address) const; + + // If a DIE represents a subroutine, returns its mangled name + // (or short name, if mangled is missing). Otherwise returns null. + const char* getSubprogramName(const DWARFCompileUnit *cu) const; }; } diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h index 1c07c94714..911d1d68b2 100644 --- a/lib/ExecutionEngine/EventListenerCommon.h +++ b/lib/ExecutionEngine/EventListenerCommon.h @@ -14,8 +14,8 @@ #ifndef EVENT_LISTENER_COMMON_H #define EVENT_LISTENER_COMMON_H +#include "llvm/DebugInfo.h" #include "llvm/Metadata.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Path.h" diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 5dfa78f34a..56cea42a4f 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -16,11 +16,11 @@ #include "llvm/ExecutionEngine/JITEventListener.h" #define DEBUG_TYPE "amplifier-jit-event-listener" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Metadata.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" #include "llvm/Support/Debug.h" diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt index d331f830b6..74df8f0f37 100644 --- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -15,3 +15,5 @@ add_llvm_library(LLVMInterpreter if( LLVM_ENABLE_FFI ) target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} ) endif() + +add_dependencies(LLVMInterpreter intrinsics_gen) diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 89c35438ef..5202b09165 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -651,20 +651,40 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { // Check to see if any of the cases match... BasicBlock *Dest = 0; for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { - IntegersSubset Case = i.getCaseValueEx(); - for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { - IntegersSubset::Range r = Case.getItem(n); + IntegersSubset& Case = i.getCaseValueEx(); + if (Case.isSingleNumber()) { // FIXME: Currently work with ConstantInt based numbers. - const ConstantInt *LowCI = r.getLow().toConstantInt(); - const ConstantInt *HighCI = r.getHigh().toConstantInt(); - GenericValue Low = getOperandValue(const_cast<ConstantInt*>(LowCI), SF); - GenericValue High = getOperandValue(const_cast<ConstantInt*>(HighCI), SF); - if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 && - executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) { + const ConstantInt *CI = Case.getSingleNumber(0).toConstantInt(); + GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF); + if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { Dest = cast<BasicBlock>(i.getCaseSuccessor()); break; } } + if (Case.isSingleNumbersOnly()) { + for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { + // FIXME: Currently work with ConstantInt based numbers. + const ConstantInt *CI = Case.getSingleNumber(n).toConstantInt(); + GenericValue Val = getOperandValue(const_cast<ConstantInt*>(CI), SF); + if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { + Dest = cast<BasicBlock>(i.getCaseSuccessor()); + break; + } + } + } else + for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { + IntegersSubset::Range r = Case.getItem(n); + // FIXME: Currently work with ConstantInt based numbers. + const ConstantInt *LowCI = r.getLow().toConstantInt(); + const ConstantInt *HighCI = r.getHigh().toConstantInt(); + GenericValue Low = getOperandValue(const_cast<ConstantInt*>(LowCI), SF); + GenericValue High = getOperandValue(const_cast<ConstantInt*>(HighCI), SF); + if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 && + executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) { + Dest = cast<BasicBlock>(i.getCaseSuccessor()); + break; + } + } } if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default SwitchToNewBasicBlock(Dest, SF); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index acbb20b1b2..d8bbc01d3e 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -17,9 +17,9 @@ #include "JITDwarfEmitter.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Constants.h" -#include "llvm/Module.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineCodeInfo.h" @@ -112,13 +112,18 @@ namespace { /// particular GlobalVariable so that we can reuse them if necessary. GlobalToIndirectSymMapTy GlobalToIndirectSymMap; +#ifndef NDEBUG /// Instance of the JIT this ResolverState serves. JIT *TheJIT; +#endif public: JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), - FunctionToCallSitesMap(this), - TheJIT(jit) {} + FunctionToCallSitesMap(this) { +#ifndef NDEBUG + TheJIT = jit; +#endif + } FunctionToLazyStubMapTy& getFunctionToLazyStubMap( const MutexGuard& locked) { diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index e6142e3678..6b8e9d1954 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -16,9 +16,9 @@ #include "llvm/ExecutionEngine/JITEventListener.h" #define DEBUG_TYPE "oprofile-jit-event-listener" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/Support/Debug.h" diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index aec2547f00..2ed3c288c1 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -684,7 +684,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *NG = new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(), DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV, - DstGV->isThreadLocal(), + DstGV->getThreadLocalMode(), DstGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. @@ -759,7 +759,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()), SGV->isConstant(), SGV->getLinkage(), /*init*/0, SGV->getName(), /*insertbefore*/0, - SGV->isThreadLocal(), + SGV->getThreadLocalMode(), SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. copyGVAttributes(NewDGV, SGV); diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 6d6d23a893..6a3e16d985 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1066,11 +1066,19 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, entry.Index += LocalSymbolData.size(); if (is64Bit()) { String64(*F, entry.r_offset); + if (TargetObjectWriter->isN64()) { + String32(*F, entry.Index); - struct ELF::Elf64_Rela ERE64; - ERE64.setSymbolAndType(entry.Index, entry.Type); - String64(*F, ERE64.r_info); - + String8(*F, TargetObjectWriter->getRSsym(entry.Type)); + String8(*F, TargetObjectWriter->getRType3(entry.Type)); + String8(*F, TargetObjectWriter->getRType2(entry.Type)); + String8(*F, TargetObjectWriter->getRType(entry.Type)); + } + else { + struct ELF::Elf64_Rela ERE64; + ERE64.setSymbolAndType(entry.Index, entry.Type); + String64(*F, ERE64.r_info); + } if (hasRelocationAddend()) String64(*F, entry.r_addend); } else { diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 9f98cb32e9..94be557b52 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -85,10 +85,8 @@ MCAsmInfo::MCAsmInfo() { SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; DwarfUsesInlineInfoSection = false; - DwarfRequiresRelocationForSectionOffset = true; DwarfSectionOffsetDirective = 0; - DwarfUsesLabelOffsetForRanges = true; - DwarfUsesRelocationsForStringPool = true; + DwarfUsesRelocationsAcrossSections = true; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; } diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 73ef7ba060..8e0ac23efc 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -59,7 +59,5 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasNoDeadStrip = true; HasSymbolResolver = true; - DwarfRequiresRelocationForSectionOffset = false; - DwarfUsesLabelOffsetForRanges = false; - DwarfUsesRelocationsForStringPool = false; + DwarfUsesRelocationsAcrossSections = false; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 1371396632..e731d95916 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -171,7 +171,7 @@ public: unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0); @@ -536,7 +536,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 52d4ab5e70..229b2d6960 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -705,7 +705,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) { MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); MCSymbol *AbbrevSectionSymbol; - if (AsmInfo.doesDwarfRequireRelocationForSectionOffset()) { + if (AsmInfo.doesDwarfUseRelocationsAcrossSections()) { AbbrevSectionSymbol = context.CreateTempSymbol(); MCOS->EmitLabel(AbbrevSectionSymbol); } else { @@ -1285,7 +1285,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, 0); if (verboseAsm) streamer.AddComment("FDE CIE Offset"); streamer.EmitAbsValue(offset, 4); - } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) { + } else if (!asmInfo.doesDwarfUseRelocationsAcrossSections()) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, cieStart, 0); streamer.EmitAbsValue(offset, 4); diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 171ab4d9bf..6eb6914f4b 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -15,9 +15,11 @@ using namespace llvm; MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, uint16_t EMachine_, - bool HasRelocationAddend_) + bool HasRelocationAddend_, + bool IsN64_) : OSABI(OSABI_), EMachine(EMachine_), - HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) { + HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_), + IsN64(IsN64_){ } /// Default e_flags = 0 diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index b25f8efcd4..bf35ab7fc3 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -91,7 +91,7 @@ public: unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { + uint64_t Size = 0, unsigned ByteAlignment = 0) { llvm_unreachable("ELF doesn't support this directive"); } virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 970aa8baf4..b75fe2c3a7 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -74,7 +74,7 @@ public: llvm_unreachable("macho doesn't support this directive"); } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); @@ -326,7 +326,7 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); // The symbol may not be present, which only creates the section. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index f669faad14..46579d7b1f 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -63,7 +63,7 @@ namespace { virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) {} + uint64_t Size = 0, unsigned ByteAlignment = 0) {} virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index eefb3e1ad4..3bf03ad4b9 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -206,6 +206,9 @@ private: void EatToEndOfStatement(); + bool ParseMacroArgument(MacroArgument &MA); + bool ParseMacroArguments(const Macro *M, std::vector<MacroArgument> &A); + /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. @@ -280,6 +283,8 @@ private: void InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS); bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept" + bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" + bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" }; @@ -1299,6 +1304,10 @@ bool AsmParser::ParseStatement() { // Macro-like directives if (IDVal == ".rept") return ParseDirectiveRept(IDLoc); + if (IDVal == ".irp") + return ParseDirectiveIrp(IDLoc); + if (IDVal == ".irpc") + return ParseDirectiveIrpc(IDLoc); if (IDVal == ".endr") return ParseDirectiveEndr(IDLoc); @@ -1562,44 +1571,76 @@ MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, { } -bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, - const Macro *M) { - // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate - // this, although we should protect against infinite loops. - if (ActiveMacros.size() == 20) - return TokError("macros cannot be nested more than 20 levels deep"); - - // Parse the macro instantiation arguments. - std::vector<MacroArgument> MacroArguments; - MacroArguments.push_back(MacroArgument()); +/// ParseMacroArgument - Extract AsmTokens for a macro argument. +/// This is used for both default macro parameter values and the +/// arguments in macro invocations +bool AsmParser::ParseMacroArgument(MacroArgument &MA) { unsigned ParenLevel = 0; + for (;;) { - if (Lexer.is(AsmToken::Eof)) + SMLoc LastTokenLoc; + + if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) return TokError("unexpected token in macro instantiation"); + + // HandleMacroEntry relies on not advancing the lexer here + // to be able to fill in the remaining default parameter values if (Lexer.is(AsmToken::EndOfStatement)) break; + if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) + break; - // If we aren't inside parentheses and this is a comma, start a new token - // list. - if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) { - MacroArguments.push_back(MacroArgument()); - } else { - // Adjust the current parentheses level. - if (Lexer.is(AsmToken::LParen)) - ++ParenLevel; - else if (Lexer.is(AsmToken::RParen) && ParenLevel) - --ParenLevel; - - // Append the token to the current argument list. - MacroArguments.back().push_back(getTok()); - } + // Adjust the current parentheses level. + if (Lexer.is(AsmToken::LParen)) + ++ParenLevel; + else if (Lexer.is(AsmToken::RParen) && ParenLevel) + --ParenLevel; + + // Append the token to the current argument list. + MA.push_back(getTok()); Lex(); } - // If the last argument didn't end up with any tokens, it's not a real - // argument and we should remove it from the list. This happens with either - // a tailing comma or an empty argument list. - if (MacroArguments.back().empty()) - MacroArguments.pop_back(); + if (ParenLevel != 0) + return TokError("unbalanced parenthesises in macro argument"); + return false; +} + +// Parse the macro instantiation arguments. +bool AsmParser::ParseMacroArguments(const Macro *M, + std::vector<MacroArgument> &A) { + const unsigned NParameters = M ? M->Parameters.size() : 0; + + // Parse two kinds of macro invocations: + // - macros defined without any parameters accept an arbitrary number of them + // - macros defined with parameters accept at most that many of them + for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; + ++Parameter) { + MacroArgument MA; + + if (ParseMacroArgument(MA)) + return true; + + if (!MA.empty()) + A.push_back(MA); + if (Lexer.is(AsmToken::EndOfStatement)) + return false; + + if (Lexer.is(AsmToken::Comma)) + Lex(); + } + return TokError("Too many arguments"); +} + +bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, + const Macro *M) { + // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate + // this, although we should protect against infinite loops. + if (ActiveMacros.size() == 20) + return TokError("macros cannot be nested more than 20 levels deep"); + + std::vector<MacroArgument> MacroArguments; + if (ParseMacroArguments(M, MacroArguments)) + return true; // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. @@ -3308,6 +3349,107 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { return false; } +/// ParseDirectiveIrp +/// ::= .irp symbol,values +bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { + std::vector<StringRef> Parameters; + StringRef Parameter; + + if (ParseIdentifier(Parameter)) + return TokError("expected identifier in '.irp' directive"); + + Parameters.push_back(Parameter); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("expected comma in '.irp' directive"); + + Lex(); + + std::vector<MacroArgument> A; + if (ParseMacroArguments(0, A)) + return true; + + // Eat the end of statement. + Lex(); + + // Lex the irp definition. + Macro *M = ParseMacroLikeBody(DirectiveLoc); + if (!M) + return true; + + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + for (std::vector<MacroArgument>::iterator i = A.begin(), e = A.end(); i != e; + ++i) { + std::vector<MacroArgument> Args; + Args.push_back(*i); + + if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) + return true; + } + + InstantiateMacroLikeBody(M, DirectiveLoc, OS); + + return false; +} + +/// ParseDirectiveIrpc +/// ::= .irpc symbol,values +bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { + std::vector<StringRef> Parameters; + StringRef Parameter; + + if (ParseIdentifier(Parameter)) + return TokError("expected identifier in '.irpc' directive"); + + Parameters.push_back(Parameter); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("expected comma in '.irpc' directive"); + + Lex(); + + std::vector<MacroArgument> A; + if (ParseMacroArguments(0, A)) + return true; + + if (A.size() != 1 || A.front().size() != 1) + return TokError("unexpected token in '.irpc' directive"); + + // Eat the end of statement. + Lex(); + + // Lex the irpc definition. + Macro *M = ParseMacroLikeBody(DirectiveLoc); + if (!M) + return true; + + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + StringRef Values = A.front().front().getString(); + std::size_t I, End = Values.size(); + for (I = 0; I < End; ++I) { + MacroArgument Arg; + Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); + + std::vector<MacroArgument> Args; + Args.push_back(Arg); + + if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) + return true; + } + + InstantiateMacroLikeBody(M, DirectiveLoc, OS); + + return false; +} + bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { if (ActiveMacros.empty()) return TokError("unexpected '.endr' directive, no current .rept"); diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp index a770c97438..9ccab93067 100644 --- a/lib/MC/MCPureStreamer.cpp +++ b/lib/MC/MCPureStreamer.cpp @@ -39,7 +39,7 @@ public: virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -144,7 +144,7 @@ void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { } void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { report_fatal_error("not yet implemented in pure streamer"); } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 67dc649d49..b026277ac6 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -67,7 +67,7 @@ public: virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment); + uint64_t Size,unsigned ByteAlignment); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); @@ -324,7 +324,7 @@ void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment) { + uint64_t Size,unsigned ByteAlignment) { llvm_unreachable("not implemented"); } diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index bd27a56e73..8ab54c6295 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -622,6 +622,28 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol, return object_error::success; } +ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData( + const coff_symbol *symbol) const { + const uint8_t *aux = NULL; + + if ( symbol->NumberOfAuxSymbols > 0 ) { + // AUX data comes immediately after the symbol in COFF + aux = reinterpret_cast<const uint8_t *>(symbol + 1); +# ifndef NDEBUG + // Verify that the aux symbol points to a valid entry in the symbol table. + uintptr_t offset = uintptr_t(aux) - uintptr_t(base()); + if (offset < Header->PointerToSymbolTable + || offset >= Header->PointerToSymbolTable + + (Header->NumberOfSymbols * sizeof(coff_symbol))) + report_fatal_error("Aux Symbol data was outside of symbol table."); + + assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + == 0 && "Aux Symbol data did not point to the beginning of a symbol"); +# endif + } + return ArrayRef<uint8_t>(aux, symbol->NumberOfAuxSymbols * sizeof(coff_symbol)); +} + error_code COFFObjectFile::getSectionName(const coff_section *Sec, StringRef &Res) const { StringRef Name; @@ -694,6 +716,20 @@ error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, return object_error::success; } +const coff_section *COFFObjectFile::getCOFFSection(section_iterator &It) const { + return toSec(It->getRawDataRefImpl()); +} + +const coff_symbol *COFFObjectFile::getCOFFSymbol(symbol_iterator &It) const { + return toSymb(It->getRawDataRefImpl()); +} + +const coff_relocation *COFFObjectFile::getCOFFRelocation( + relocation_iterator &It) const { + return toRel(It->getRawDataRefImpl()); +} + + #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \ case COFF::enum: res = #enum; break; diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index e7d8483128..91d086b689 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -248,6 +248,12 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const { return ConstantRange(Lower - Val, Upper - Val); } +/// \brief Subtract the specified range from this range (aka relative complement +/// of the sets). +ConstantRange ConstantRange::difference(const ConstantRange &CR) const { + return intersectWith(CR.inverse()); +} + /// intersectWith - Return the range that results from the intersection of this /// range with another range. The resultant range is guaranteed to include all /// elements contained in both input ranges, and to have the smallest possible @@ -316,7 +322,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { return CR; } - if (CR.Upper.ult(Lower)) { + if (CR.Upper.ule(Lower)) { if (CR.Lower.ult(Lower)) return *this; diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index 18c658173a..dd218f6099 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -52,7 +52,7 @@ std::string StrError(int errnum) { # endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" if (errnum) - strerror_s(buffer, errnum); + strerror_s(buffer, MaxErrStrLen - 1, errnum); #elif defined(HAVE_STRERROR) // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 677da5cd9f..550fa5765c 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/DataStream.h" @@ -450,6 +451,60 @@ std::string sys::getHostCPUName() { .Case("POWER7", "pwr7") .Default(generic); } +#elif defined(__linux__) && defined(__arm__) +std::string sys::getHostCPUName() { + // The cpuid register on arm is not accessible from user space. On Linux, + // it is exposed through the /proc/cpuinfo file. + // Note: We cannot mmap /proc/cpuinfo here and then process the resulting + // memory buffer because the 'file' has 0 size (it can be read from only + // as a stream). + + std::string Err; + DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); + if (!DS) { + DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n"); + return "generic"; + } + + // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line + // in all cases. + char buffer[1024]; + size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer)); + delete DS; + + StringRef Str(buffer, CPUInfoSize); + + SmallVector<StringRef, 32> Lines; + Str.split(Lines, "\n"); + + // Look for the CPU implementer line. + StringRef Implementer; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU implementer")) + Implementer = Lines[I].substr(15).ltrim("\t :"); + + if (Implementer == "0x41") // ARM Ltd. + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) + .Case("0x926", "arm926ej-s") + .Case("0xb02", "mpcore") + .Case("0xb36", "arm1136j-s") + .Case("0xb56", "arm1156t2-s") + .Case("0xb76", "arm1176jz-s") + .Case("0xc08", "cortex-a8") + .Case("0xc09", "cortex-a9") + .Case("0xc20", "cortex-m0") + .Case("0xc23", "cortex-m3") + .Case("0xc24", "cortex-m4") + .Default("generic"); + + return "generic"; +} #else std::string sys::getHostCPUName() { return "generic"; diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 90672b68f7..816f7c223b 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -17,6 +17,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" @@ -214,6 +215,14 @@ error_code MemoryBuffer::getFile(const char *Filename, OwningPtr<MemoryBuffer> &result, int64_t FileSize, bool RequiresNullTerminator) { + // First check that the "file" is not a directory + bool is_dir = false; + error_code err = sys::fs::is_directory(Filename, is_dir); + if (err) + return err; + if (is_dir) + return make_error_code(errc::is_a_directory); + int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index dcddeda977..db4a56b692 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -60,8 +60,11 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case '\177': if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { - if (length >= 18 && magic[17] == 0) - switch (magic[16]) { + bool Data2MSB = magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (length >= 18 && magic[high] == 0) + switch (magic[low]) { default: break; case 1: return ELF_Relocatable_FileType; case 2: return ELF_Executable_FileType; diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index e2a69a650d..46571c049f 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -744,6 +744,8 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { /// @brief Identify the magic in magic. file_magic identify_magic(StringRef magic) { + if (magic.size() < 4) + return file_magic::unknown; switch ((unsigned char)magic[0]) { case 0xDE: // 0x0B17C0DE = BC wraper if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamableMemoryObject.cpp index c23f07b8fc..fe3752a77a 100644 --- a/lib/Support/StreamableMemoryObject.cpp +++ b/lib/Support/StreamableMemoryObject.cpp @@ -20,7 +20,7 @@ class RawMemoryObject : public StreamableMemoryObject { public: RawMemoryObject(const unsigned char *Start, const unsigned char *End) : FirstChar(Start), LastChar(End) { - assert(LastChar > FirstChar && "Invalid start/end range"); + assert(LastChar >= FirstChar && "Invalid start/end range"); } virtual uint64_t getBase() const { return 0; } diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index c131fe07f4..c2fc261df3 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -189,7 +189,7 @@ void StringMapImpl::RehashTable() { // grow/rehash the table. if (NumItems*4 > NumBuckets*3) { NewSize = NumBuckets*2; - } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) { + } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) { NewSize = NumBuckets; } else { return; diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index 109580478d..0587aaec7e 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -30,10 +30,12 @@ void ThreadLocalImpl::setInstance(const void* d) { void **pd = reinterpret_cast<void**>(&data); *pd = const_cast<void*>(d); } -const void* ThreadLocalImpl::getInstance() { return data; } -void ThreadLocalImpl::removeInstance() { +const void* ThreadLocalImpl::getInstance() { void **pd = reinterpret_cast<void**>(&data); - *pd = 0; + return *pd; +} +void ThreadLocalImpl::removeInstance() { + setInstance(0); } } #else diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 822ada7d9b..7b26ea9b42 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -62,7 +62,12 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case mblaze: return "mblaze"; - case hexagon: return "hexagon"; + case mips: + case mipsel: + case mips64: + case mips64el:return "mips"; + + case hexagon: return "hexagon"; case r600: return "r600"; diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 4e71b42be9..62d9a2fb07 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -24,6 +24,9 @@ #if HAVE_FCNTL_H #include <fcntl.h> #endif +#ifdef HAVE_SYS_MMAN_H +#include <sys/mman.h> +#endif #if HAVE_DIRENT_H # include <dirent.h> # define NAMLEN(dirent) strlen((dirent)->d_name) @@ -351,20 +354,22 @@ error_code status(const Twine &path, file_status &result) { return ec; } + perms prms = static_cast<perms>(status.st_mode & perms_mask); + if (S_ISDIR(status.st_mode)) - result = file_status(file_type::directory_file); + result = file_status(file_type::directory_file, prms); else if (S_ISREG(status.st_mode)) - result = file_status(file_type::regular_file); + result = file_status(file_type::regular_file, prms); else if (S_ISBLK(status.st_mode)) - result = file_status(file_type::block_file); + result = file_status(file_type::block_file, prms); else if (S_ISCHR(status.st_mode)) - result = file_status(file_type::character_file); + result = file_status(file_type::character_file, prms); else if (S_ISFIFO(status.st_mode)) - result = file_status(file_type::fifo_file); + result = file_status(file_type::fifo_file, prms); else if (S_ISSOCK(status.st_mode)) - result = file_status(file_type::socket_file); + result = file_status(file_type::socket_file, prms); else - result = file_status(file_type::type_unknown); + result = file_status(file_type::type_unknown, prms); result.fs_st_dev = status.st_dev; result.fs_st_ino = status.st_ino; @@ -372,6 +377,35 @@ error_code status(const Twine &path, file_status &result) { return error_code::success(); } +// Modifies permissions on a file. +error_code permissions(const Twine &path, perms prms) { + if ((prms & add_perms) && (prms & remove_perms)) + llvm_unreachable("add_perms and remove_perms are mutually exclusive"); + + // Get current permissions + file_status info; + if (error_code ec = status(path, info)) { + return ec; + } + + // Set updated permissions. + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + perms permsToSet; + if (prms & add_perms) { + permsToSet = (info.permissions() | prms) & perms_mask; + } else if (prms & remove_perms) { + permsToSet = (info.permissions() & ~prms) & perms_mask; + } else { + permsToSet = prms & perms_mask; + } + if (::chmod(p.begin(), static_cast<mode_t>(permsToSet))) { + return error_code(errno, system_category()); + } + + return error_code::success(); +} + // Since this is most often used for temporary files, mode defaults to 0600. error_code unique_file(const Twine &model, int &result_fd, SmallVectorImpl<char> &result_path, @@ -525,6 +559,36 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + SmallString<128> path_storage; + StringRef name = path.toNullTerminatedStringRef(path_storage); + int oflags = map_writable ? O_RDWR : O_RDONLY; + int ofd = ::open(name.begin(), oflags); + if ( ofd == -1 ) + return error_code(errno, system_category()); + AutoFD fd(ofd); + int flags = map_writable ? MAP_SHARED : MAP_PRIVATE; + int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ; +#ifdef MAP_FILE + flags |= MAP_FILE; +#endif + result = ::mmap(0, size, prot, flags, fd, file_offset); + if (result == MAP_FAILED) { + return error_code(errno, system_category()); + } + + return error_code::success(); +} + +error_code unmap_file_pages(void *base, size_t size) { + if ( ::munmap(base, size) == -1 ) + return error_code(errno, system_category()); + + return error_code::success(); +} + + } // end namespace fs } // end namespace sys } // end namespace llvm diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 130b11b93d..35f01802ba 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -15,6 +15,7 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Mutex.h" +#include <string> #include <vector> #include <algorithm> #if HAVE_EXECINFO_H @@ -43,7 +44,7 @@ static SmartMutex<true> SignalsMutex; /// InterruptFunction - The function to call if ctrl-c is pressed. static void (*InterruptFunction)() = 0; -static std::vector<sys::Path> FilesToRemove; +static std::vector<std::string> FilesToRemove; static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun; // IntSigs - Signals that may interrupt the program at any time. @@ -121,10 +122,20 @@ static void UnregisterHandlers() { /// RemoveFilesToRemove - Process the FilesToRemove list. This function /// should be called with the SignalsMutex lock held. +/// NB: This must be an async signal safe function. It cannot allocate or free +/// memory, even in debug builds. static void RemoveFilesToRemove() { - while (!FilesToRemove.empty()) { - FilesToRemove.back().eraseFromDisk(true); - FilesToRemove.pop_back(); + // Note: avoid iterators in case of debug iterators that allocate or release + // memory. + for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) { + // Note that we don't want to use any external code here, and we don't care + // about errors. We're going to try as hard as we can as often as we need + // to to make these files go away. If these aren't files, too bad. + // + // We do however rely on a std::string implementation for which repeated + // calls to 'c_str()' don't allocate memory. We pre-call 'c_str()' on all + // of these strings to try to ensure this is safe. + unlink(FilesToRemove[i].c_str()); } } @@ -184,7 +195,19 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { SignalsMutex.acquire(); - FilesToRemove.push_back(Filename); + std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0]; + FilesToRemove.push_back(Filename.str()); + + // We want to call 'c_str()' on every std::string in this vector so that if + // the underlying implementation requires a re-allocation, it happens here + // rather than inside of the signal handler. If we see the vector grow, we + // have to call it on every entry. If it remains in place, we only need to + // call it on the latest one. + if (OldPtr == &FilesToRemove[0]) + FilesToRemove.back().c_str(); + else + for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) + FilesToRemove[i].c_str(); SignalsMutex.release(); @@ -195,10 +218,19 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, // DontRemoveFileOnSignal - The public API void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) { SignalsMutex.acquire(); - std::vector<sys::Path>::reverse_iterator I = - std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename); - if (I != FilesToRemove.rend()) - FilesToRemove.erase(I.base()-1); + std::vector<std::string>::reverse_iterator RI = + std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename.str()); + std::vector<std::string>::iterator I = FilesToRemove.end(); + if (RI != FilesToRemove.rend()) + I = FilesToRemove.erase(RI.base()-1); + + // We need to call c_str() on every element which would have been moved by + // the erase. These elements, in a C++98 implementation where c_str() + // requires a reallocation on the first call may have had the call to c_str() + // made on insertion become invalid by being copied down an element. + for (std::vector<std::string>::iterator E = FilesToRemove.end(); I != E; ++I) + I->c_str(); + SignalsMutex.release(); } diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 4868b184b8..66eeab058f 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -497,6 +497,41 @@ handle_status_error: return error_code::success(); } + +// Modifies permissions on a file. +error_code permissions(const Twine &path, perms prms) { +#if 0 // verify code below before enabling: + // If the permissions bits are not trying to modify + // "write" permissions, there is nothing to do. + if (!(prms & (owner_write|group_write|others_write))) + return error_code::success(); + + SmallString<128> path_storage; + SmallVector<wchar_t, 128> path_utf16; + + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; + + DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); + + if (prms & add_perms) { + attributes &= ~FILE_ATTRIBUTE_READONLY; + } + else if (prms & remove_perms) { + attributes |= FILE_ATTRIBUTE_READONLY; + } + else { + assert(0 && "neither add_perms or remove_perms is set"); + } + + if ( ! ::SetFileAttributesW(path_utf16.begin(), attributes)) + return windows_error(::GetLastError()); +#endif + return error_code::success(); +} + + // FIXME: mode should be used here and default to user r/w only, // it currently comes in as a UNIX mode. error_code unique_file(const Twine &model, int &result_fd, @@ -755,6 +790,19 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) { return error_code::success(); } +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; +} + +error_code unmap_file_pages(void *base, size_t size) { + assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; +} + + + } // end namespace fs } // end namespace sys } // end namespace llvm diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp index 89d3f5e81e..7c8367ab9d 100644 --- a/lib/TableGen/TableGenBackend.cpp +++ b/lib/TableGen/TableGenBackend.cpp @@ -11,13 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/TableGenBackend.h" using namespace llvm; +static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, + StringRef Suffix) { + uint64_t Pos = OS.tell(); + OS << Prefix; + for (unsigned i = OS.tell() - Pos, e = 80 - Suffix.size(); i != e; ++i) + OS << Fill; + OS << Suffix << '\n'; +} + void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) { - OS << "//===- TableGen'erated file -------------------------------------*-" - " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate" - "d file, do not edit!\n//\n//===------------------------------------" - "----------------------------------===//\n\n"; + printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "|* " + Desc, ' ', "*|"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "|* Automatically generated file, do not edit!", ' ', "*|"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "\\*===", '-', "===*/"); + OS << '\n'; } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 10d7f56c7f..7af8b9d909 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> - : Processor<Name, GenericItineraries, Features>; + : Processor<Name, NoItineraries, Features>; // V4 Processors. def : ProcNoItin<"generic", []>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 967c0a8462..76cd0c389d 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,8 +23,8 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" @@ -515,7 +515,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); case 'a': // Print as a memory address. if (MI->getOperand(OpNum).isReg()) { O << "[" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0811d226b4..08e55429ce 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1738,26 +1738,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } +/// analyzeCompare - For a comparison instruction, return the source registers +/// in SrcReg and SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. bool ARMBaseInstrInfo:: -AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, - int &CmpValue) const { +analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const { switch (MI->getOpcode()) { default: break; case ARM::CMPri: case ARM::t2CMPri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; case ARM::CMPrr: case ARM::t2CMPrr: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = MI->getOperand(1).getReg(); CmpMask = ~0; CmpValue = 0; return true; case ARM::TSTri: case ARM::t2TSTri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = MI->getOperand(1).getImm(); CmpValue = 0; return true; @@ -1795,21 +1802,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, return false; } -/// OptimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. Convert -/// the SUBrr(r1,r2)|Subri(r1,CmpValue) instruction into one that sets the flags -/// register and remove the CMPrr(r1,r2)|CMPrr(r2,r1)|CMPri(r1,CmpValue) -/// instruction. -bool ARMBaseInstrInfo:: -OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} + +/// isRedundantFlagInstr - check whether the first instruction, whose only +/// purpose is to update flags, can be made redundant. +/// CMPrr can be made redundant by SUBrr if the operands are the same. +/// CMPri can be made redundant by SUBri if the operands are the same. +/// This function can be extended later on. +inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, + unsigned SrcReg2, int ImmValue, + MachineInstr *OI) { + if ((CmpI->getOpcode() == ARM::CMPrr || + CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::SUBrr || + OI->getOpcode() == ARM::t2SUBrr) && + ((OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) || + (OI->getOperand(1).getReg() == SrcReg2 && + OI->getOperand(2).getReg() == SrcReg))) + return true; - MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); - if (llvm::next(DI) != MRI->def_end()) - // Only support one definition. - return false; + if ((CmpI->getOpcode() == ARM::CMPri || + CmpI->getOpcode() == ARM::t2CMPri) && + (OI->getOpcode() == ARM::SUBri || + OI->getOpcode() == ARM::t2SUBri) && + OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getImm() == ImmValue) + return true; + return false; +} - MachineInstr *MI = &*DI; +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register; +/// Remove a redundant Compare instruction if an earlier instruction can set the +/// flags in the same way as Compare. +/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two +/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the +/// condition code of instructions which use the flags. +bool ARMBaseInstrInfo:: +optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const { + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { @@ -1840,13 +1893,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). MachineInstr *Sub = NULL; - unsigned SrcReg2 = 0; - if (CmpInstr->getOpcode() == ARM::CMPrr || - CmpInstr->getOpcode() == ARM::t2CMPrr) { - SrcReg2 = CmpInstr->getOperand(1).getReg(); + if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = NULL; - } else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { + else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. @@ -1859,40 +1909,19 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) - return false; - if (!MO.isReg()) continue; - + if (Instr.modifiesRegister(ARM::CPSR, TRI) || + Instr.readsRegister(ARM::CPSR, TRI)) // This instruction modifies or uses CPSR after the one we want to // change. We can't do this transformation. - if (MO.getReg() == ARM::CPSR) - return false; - } - - // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1). - if (SrcReg2 != 0 && - (Instr.getOpcode() == ARM::SUBrr || - Instr.getOpcode() == ARM::t2SUBrr) && - ((Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getReg() == SrcReg2) || - (Instr.getOperand(1).getReg() == SrcReg2 && - Instr.getOperand(2).getReg() == SrcReg))) { - Sub = &*I; - break; - } + return false; - // Check whether the current instruction is SUBri(r1, CmpValue). - if ((CmpInstr->getOpcode() == ARM::CMPri || - CmpInstr->getOpcode() == ARM::t2CMPri) && - Instr.getOpcode() == ARM::SUBri && CmpValue != 0 && - Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getImm() == CmpValue) { + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { Sub = &*I; break; } @@ -1950,7 +1979,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // CPSR use (i.e. used in another block), then it's not safe to perform // the optimization. // When checking against Sub, we handle the condition codes GE, LT, GT, LE. - SmallVector<MachineOperand*, 4> OperandsToUpdate; + SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> + OperandsToUpdate; bool isSafe = false; I = CmpInstr; E = CmpInstr->getParent()->end(); @@ -1971,30 +2001,20 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, } // Condition code is after the operand before CPSR. ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); - if (Sub) - switch (CC) { - default: + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) return false; - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: - case ARMCC::HS: - case ARMCC::LS: - case ARMCC::HI: - case ARMCC::LO: - case ARMCC::EQ: - case ARMCC::NE: - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based - // on CMP needs to be updated to be based on SUB. - // Push the condition code operands to OperandsToUpdate. - // If it is safe to remove CmpInstr, the condition code of these - // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(&((*I).getOperand(IO-1))); - break; - } + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) + OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), + NewCC)); + } else switch (CC) { default: @@ -2024,26 +2044,9 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - for (unsigned i = 0; i < OperandsToUpdate.size(); i++) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)OperandsToUpdate[i]->getImm(); - ARMCC::CondCodes NewCC; - switch (CC) { - default: llvm_unreachable("only expecting less/greater comparisons here"); - case ARMCC::GE: NewCC = ARMCC::LE; break; - case ARMCC::LT: NewCC = ARMCC::GT; break; - case ARMCC::GT: NewCC = ARMCC::LT; break; - case ARMCC::LE: NewCC = ARMCC::GE; break; - case ARMCC::HS: NewCC = ARMCC::LS; break; - case ARMCC::LS: NewCC = ARMCC::HS; break; - case ARMCC::HI: NewCC = ARMCC::LO; break; - case ARMCC::LO: NewCC = ARMCC::HI; break; - case ARMCC::EQ: - case ARMCC::NE: - NewCC = CC; - break; - } - OperandsToUpdate[i]->setImm(NewCC); - } + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + return true; } } @@ -2175,9 +2178,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; + int ItinUOps = ItinData->getNumMicroOps(Class); + if (ItinUOps >= 0) + return ItinUOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2251,19 +2254,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - UOps = (NumRegs / 2); + int A8UOps = (NumRegs / 2); if (NumRegs % 2) - ++UOps; - return UOps; + ++A8UOps; + return A8UOps; } else if (Subtarget.isCortexA9()) { - UOps = (NumRegs / 2); + int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++UOps; - return UOps; + ++A9UOps; + return A9UOps; } else { // Assume the worst. return NumRegs; @@ -2763,11 +2766,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned NewUseIdx; const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, Reg, NewUseIdx, UseAdj); - if (NewUseMI) { - UseMI = NewUseMI; - UseIdx = NewUseIdx; - UseMCID = &UseMI->getDesc(); - } + if (!NewUseMI) + return -1; + + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); } if (Reg == ARM::CPSR) { @@ -2795,6 +2799,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } + if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) + return -1; + unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() @@ -3015,9 +3022,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, return 1; // If the second MI is predicated, then there is an implicit use dependency. - int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI, - DepMI->getNumOperands()); - return (Latency <= 0) ? 1 : Latency; + return getInstrLatency(ItinData, DefMI); } unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, @@ -3054,9 +3059,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, unsigned Class = MCID.getSchedClass(); // For instructions with variable uops, use uops as latency. - if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) { + if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) return getNumMicroOps(ItinData, MI); - } + // For the common case, fall back on the itinerary's latency. unsigned Latency = ItinData->getStageLatency(Class); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 8217f239d1..1a10a4ab1c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -186,16 +186,20 @@ public: return NumCycles == 1; } - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - int &CmpMask, int &CmpValue) const; - - /// OptimizeCompareInstr - Convert the instruction to set the zero flag so - /// that we can remove a "comparison with zero". - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - int CmpMask, int CmpValue, + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const; + + /// optimizeCompareInstr - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero"; Remove a redundant CMP + /// instruction if the flags can be updated in the same way by an earlier + /// instruction such as SUB. + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; /// FoldImmediate - 'Reg' is known to be defined by a move immediate diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c386a01e89..3650e1fb77 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -471,22 +471,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0); + MIB.addReg(D0, getUndefRegState(SrcIsUndef)); if (NumRegs > 1 && TableEntry->copyAllListRegs) - MIB.addReg(D1); + MIB.addReg(D1, getUndefRegState(SrcIsUndef)); if (NumRegs > 2 && TableEntry->copyAllListRegs) - MIB.addReg(D2); + MIB.addReg(D2, getUndefRegState(SrcIsUndef)); if (NumRegs > 3 && TableEntry->copyAllListRegs) - MIB.addReg(D3); + MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) // Add an implicit kill for the super-reg. + if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); // Transfer memoperands. diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2158b7e028..ff660210ea 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -177,7 +177,6 @@ class ARMFastISel : public FastISel { bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1361,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); - return true; + return true; } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -1740,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { // type and the target independent selector doesn't know how to handle it. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; - + unsigned Opc; switch (ISDOpcode) { default: return false; @@ -2146,7 +2145,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return false; // Can't handle non-double multi-reg retvals. - if (RetVT != MVT::isVoid && RetVT != MVT::i32) { + if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); @@ -2352,7 +2351,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addReg(CalleeReg); else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); - else + else MIB.addExternalSymbol(IntrMemName, 0); } else { if (UseReg) @@ -2365,7 +2364,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Explicitly adding the predicate here. AddDefaultPred(MIB); } - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2486,10 +2485,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return true; } } - + if (!MTI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) return false; @@ -2501,13 +2500,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // Don't handle volatile. if (MSI.isVolatile()) return false; - + if (!MSI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MSI.getDestAddressSpace() > 255) return false; - + return SelectCall(&I, "memset"); } case Intrinsic::trap: { @@ -2518,7 +2517,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { } bool ARMFastISel::SelectTrunc(const Instruction *I) { - // The high bits for a type smaller than the register size are assumed to be + // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); @@ -2709,7 +2708,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - + unsigned ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e1c89e0c42..238b79e1f1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6561,11 +6561,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize; + unsigned ldrOpc, strOpc, UnitSize = 0; const TargetRegisterClass *TRC = isThumb2 ? (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC_Vec = 0; if (Align & 1) { ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; @@ -6576,10 +6577,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; - UnitSize = 4; + // Check whether we can use NEON instructions. + if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if ((Align % 16 == 0) && SizeVal >= 16) { + ldrOpc = ARM::VLD1q32wb_fixed; + strOpc = ARM::VST1q32wb_fixed; + UnitSize = 16; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; + } + else if ((Align % 8 == 0) && SizeVal >= 8) { + ldrOpc = ARM::VLD1d32wb_fixed; + strOpc = ARM::VST1d32wb_fixed; + UnitSize = 8; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; + } + } + // Can't use NEON instructions. + if (UnitSize == 0) { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } } + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6590,10 +6611,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(destIn).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc), scratch) .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); @@ -6739,8 +6767,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(TRC); - if (isThumb2) { + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(destPhi).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); @@ -7113,9 +7147,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); @@ -7132,12 +7163,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); - // insert a movs at the end of BB - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), - NewMovDstReg) - .addReg(ABSSrcReg, RegState::Kill) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::CPSR, RegState::Define); + // insert a cmp at the end of BB + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -7149,7 +7178,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(NewMovDstReg, RegState::Kill) + .addReg(ABSSrcReg, RegState::Kill) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, @@ -7157,7 +7186,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) - .addReg(NewMovDstReg).addMBB(BB); + .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI->eraseFromParent(); diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index b8f607eb4c..31b0c41f08 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { if (hasNOP()) { - NopInst.setOpcode(ARM::NOP); + NopInst.setOpcode(ARM::HINT); + NopInst.addOperand(MCOperand::CreateImm(0)); NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); NopInst.addOperand(MCOperand::CreateReg(0)); } else { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 611d9194fd..6a14871bb0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -244,7 +244,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; @@ -265,9 +266,9 @@ class RegConstraint<string C> { // ARM specific transformation functions and pattern fragments. // -// so_imm_neg_XFORM - Return a so_imm value packed into the format described for -// so_imm_neg def below. -def so_imm_neg_XFORM : SDNodeXForm<imm, [{ +// imm_neg_XFORM - Return a imm value packed into the format described for +// imm_neg defs below. +def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; @@ -286,7 +287,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ int64_t Value = -(int)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; - }], so_imm_neg_XFORM> { + }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -599,7 +600,10 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } +def Imm0_15AsmOperand: ImmAsmOperand { + let Name = "Imm0_15"; + let DiagnosticType = "ImmRange0_15"; +} def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; }]> { @@ -644,6 +648,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_65535AsmOperand; } +// imm0_65535_neg - An immediate whose negative value is in the range [0.65535]. +def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{ + return -Imm >= 0 && -Imm < 65536; +}]>; + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // @@ -1640,33 +1649,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), NoItinerary, []>; } -def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000000; +def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, + "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; } -def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000001; -} - -def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000010; -} - -def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000011; -} +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1679,18 +1673,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", let Inst{27-20} = 0b01101000; let Inst{7-4} = 0b1011; let Inst{11-8} = 0b1111; - let Unpredictable{11-8} = 0b1111; } -def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", - []>, Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000100; -} - -// The i32imm operand $val can be used by a debugger to store more information +// The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", []>, Requires<[IsARM]> { @@ -3243,6 +3229,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; +def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 66daa1cb69..fec61d2390 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -32,9 +32,6 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = ThumbSRImmAsmOperand; } -def imm_neg_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); -}]>; def imm_comp_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 58119baea5..7ea96772aa 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32); }]>; +// so_imm_notSext_XFORM - Return a so_imm value packed into the format +// described for so_imm_notSext def below, with sign extension from 16 +// bits. +def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{ + APInt apIntN = N->getAPIntValue(); + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32); +}]>; + // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. @@ -86,6 +95,17 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = t2_so_imm_not_asmoperand; } +// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm +// if the upper 16 bits are zero. +def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{ + APInt apIntN = N->getAPIntValue(); + if (!apIntN.isIntN(16)) return false; + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1; + }], t2_so_imm_notSext16_XFORM> { + let ParserMatchClass = t2_so_imm_not_asmoperand; +} + // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ @@ -1911,11 +1931,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), + (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + let AddedComplexity = 1 in def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm), + (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. @@ -1924,6 +1949,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), (t2SBCri rGPR:$src, imm0_255_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only @@ -2134,8 +2161,8 @@ defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) -def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), - (t2RORrr rGPR:$lhs, rGPR:$rhs)>; +def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, @@ -2332,6 +2359,17 @@ let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; +// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise +def top16Zero: PatLeaf<(i32 rGPR:$src), [{ + return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + }]>; + +// so_imm_notSext is needed instead of so_imm_not, as the value of imm +// will match the extended, not the original bitWidth for $src. +def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm), + (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>; + + // FIXME: Disable this pattern on Darwin to workaround an assembler bug. def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, @@ -3426,21 +3464,18 @@ let imod = 0, iflags = 0, M = 1 in // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -class T2I_hint<bits<8> op7_0, string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, []> { - let Inst{31-20} = 0xf3a; - let Inst{19-16} = 0b1111; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - let Inst{10-8} = 0b000; - let Inst{7-0} = op7_0; -} - -def t2NOP : T2I_hint<0b00000000, "nop", ".w">; -def t2YIELD : T2I_hint<0b00000001, "yield", ".w">; -def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; -def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; -def t2SEV : T2I_hint<0b00000100, "sev", ".w">; +def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{ + bits<8> imm; + let Inst{31-8} = 0b111100111010111110000000; + let Inst{7-0} = imm; +} + +def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>; +def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; +def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; +def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; +def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>; +def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>; def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { bits<4> opt; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index dccbffa4c9..4e2cda433b 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1207,6 +1207,14 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics + +// (fma x, y, (fneg z)) -> (vfnms z, x, y)) +def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 45486fd0b6..81d2fa37c2 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass; def IIC_iLoad_d_i : InstrItinClass; def IIC_iLoad_d_r : InstrItinClass; def IIC_iLoad_d_ru : InstrItinClass; -def IIC_iLoad_m : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded -def IIC_iPop : InstrItinClass<0>; // micro-coded -def IIC_iPop_Br : InstrItinClass<0>; // micro-coded +def IIC_iLoad_m : InstrItinClass; +def IIC_iLoad_mu : InstrItinClass; +def IIC_iLoad_mBr : InstrItinClass; +def IIC_iPop : InstrItinClass; +def IIC_iPop_Br : InstrItinClass; def IIC_iLoadiALU : InstrItinClass; def IIC_iStore_i : InstrItinClass; def IIC_iStore_r : InstrItinClass; @@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass; def IIC_iStore_d_i : InstrItinClass; def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; -def IIC_iStore_m : InstrItinClass<0>; // micro-coded -def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_iStore_m : InstrItinClass; +def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; @@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded -def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_m : InstrItinClass; +def IIC_fpLoad_mu : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStore_m : InstrItinClass<0>; // micro-coded -def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded +def IIC_fpStore_m : InstrItinClass; +def IIC_fpStore_mu : InstrItinClass; def IIC_VLD1 : InstrItinClass; def IIC_VLD1x2 : InstrItinClass; def IIC_VLD1x3 : InstrItinClass; @@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], [], []>; - include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index eb1083ca23..61de00a208 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -155,28 +155,30 @@ def CortexA8Itineraries : MultiIssueItineraries< // Load multiple, def is the 5th operand. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 2, 1, 1, 3]>, + [1, 2, 1, 1, 3], [], -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 3], [], -1>, // dynamic uops // // Push, def is the 3th operand. InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 1, 3]>, - + [1, 1, 3], [], -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -231,12 +233,13 @@ def CortexA8Itineraries : MultiIssueItineraries< // Store multiple. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>]>, + InstrStage<2, [A8_LSPipe]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [2]>, - + InstrStage<2, [A8_LSPipe]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, @@ -397,14 +400,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 2], [], -1>, // dynamic uops // // FP Load Multiple + update InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 2], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -423,15 +428,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, - + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 1], [], -1>, // dynamic uops // NEON // Issue through integer pipeline, and execute in NEON unit. // diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index a00577bf3d..1677ba6a98 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -284,7 +284,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -292,7 +293,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -301,7 +303,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -309,7 +312,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop + branch, def is the 3rd operand. InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -318,8 +322,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, - + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -413,14 +417,15 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>]>, + InstrStage<2, [A9_LSUnit]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], [2]>, - + InstrStage<2, [A9_LSUnit]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, @@ -717,7 +722,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Load Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -726,7 +732,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -753,7 +760,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -762,7 +770,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // NEON // VLD1 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 99ed63293d..b12607b206 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -143,22 +143,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - PM->add(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM->getTargetLowering())); return false; } bool ARMPassConfig::addInstSelector() { - PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel())); + addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) - PM->add(createARMLoadStoreOptimizationPass(true)); + addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) - PM->add(createMLxExpansionPass()); + addPass(createMLxExpansionPass()); return true; } @@ -166,23 +166,23 @@ bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) { - PM->add(createARMLoadStoreOptimizationPass()); + addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } if (getARMSubtarget().hasNEON()) - PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. - PM->add(createARMExpandPseudoPass()); + addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) - addPass(IfConverterID); + addPass(&IfConverterID); } if (getARMSubtarget().isThumb2()) - PM->add(createThumb2ITBlockPass()); + addPass(createThumb2ITBlockPass()); return true; } @@ -190,10 +190,10 @@ bool ARMPassConfig::addPreSched2() { bool ARMPassConfig::addPreEmitPass() { if (getARMSubtarget().isThumb2()) { if (!getARMSubtarget().prefers32BitThumb()) - PM->add(createThumb2SizeReductionPass()); + addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - addPass(UnpackMachineBundlesID); + addPass(&UnpackMachineBundlesID); } // @LOCALMOD-START @@ -205,12 +205,12 @@ bool ARMPassConfig::addPreEmitPass() { } // @LOCALMOD-END - PM->add(createARMConstantIslandPass()); + addPass(createARMConstantIslandPass()); // @LOCALMOD-START // This pass does all the heavy sfi lifting. if (getARMSubtarget().isTargetNaCl()) { - PM->add(createARMNaClRewritePass()); + addPass(createARMNaClRewritePass()); } // @LOCALMOD-END diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 00c495b89a..22db332f2b 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -24,20 +24,11 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); - isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); + InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - StaticCtorSection = - getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); //LSDASection = NULL; } @@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } - -const MCSection * -ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); - - if (Priority == 65535) - return StaticCtorSection; - - // Emit ctors in priority order. - std::string Name = std::string(".init_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} - -const MCSection * -ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); - - if (Priority == 65535) - return StaticDtorSection; - - // Emit dtors in priority order. - std::string Name = std::string(".fini_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index ff21060414..c6a7261439 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,7 +20,6 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; - bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -32,9 +31,6 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } - - const MCSection * getStaticCtorSection(unsigned Priority) const; - const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2fae489371..68f128189f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -236,7 +236,10 @@ public: Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, Match_RequiresNotITBlock, Match_RequiresV6, - Match_RequiresThumb2 + Match_RequiresThumb2, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "ARMGenAsmMatcher.inc" + }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) @@ -3253,10 +3256,11 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef OptStr = Tok.getString(); - unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size())) + unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) .Case("sh", ARM_MB::ISH) @@ -3284,7 +3288,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef IFlagsStr = Tok.getString(); // An iflags string of "none" is interpreted to mean that none of the AIF @@ -3353,22 +3358,22 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { .Case("xpsr_nzcvq", 0x803) .Case("xpsr_g", 0x403) .Case("xpsr_nzcvqg", 0xc03) - .Case("ipsr", 5) - .Case("epsr", 6) - .Case("iepsr", 7) - .Case("msp", 8) - .Case("psp", 9) - .Case("primask", 16) - .Case("basepri", 17) - .Case("basepri_max", 18) - .Case("faultmask", 19) - .Case("control", 20) + .Case("ipsr", 0x805) + .Case("epsr", 0x806) + .Case("iepsr", 0x807) + .Case("msp", 0x808) + .Case("psp", 0x809) + .Case("primask", 0x810) + .Case("basepri", 0x811) + .Case("basepri_max", 0x812) + .Case("faultmask", 0x813) + .Case("control", 0x814) .Default(~0U); if (FlagsVal == ~0U) return MatchOperand_NoMatch; - if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) + if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; @@ -7410,6 +7415,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_ImmRange0_15: { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); + } } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 92c5d92ff7..bf74a9df3b 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -51,6 +51,8 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) +add_dependencies(LLVMARMCodeGen intrinsics_gen) + # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 9eda04d776..e97f4c7430 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { unsigned Opcode = MI->getOpcode(); + // Check for HINT instructions w/ canonical names. + if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) { + switch (MI->getOperand(0).getImm()) { + case 0: O << "\tnop"; break; + case 1: O << "\tyield"; break; + case 2: O << "\twfe"; break; + case 3: O << "\twfi"; break; + case 4: O << "\tsev"; break; + default: + // Anything else should just print normally. + printInstruction(MI, O); + printAnnotation(O, Annot); + return; + } + printPredicateOperand(MI, 1, O); + if (Opcode == ARM::t2HINT) + O << ".w"; + printAnnotation(O, Annot); + return; + } + // Check for MOVs and print canonical forms, instead. if (Opcode == ARM::MOVsr) { // FIXME: Thumb variants? @@ -736,16 +757,26 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr case 0x403: O << "xpsr_g"; return; case 0xc03: O << "xpsr_nzcvqg"; return; - case 5: O << "ipsr"; return; - case 6: O << "epsr"; return; - case 7: O << "iepsr"; return; - case 8: O << "msp"; return; - case 9: O << "psp"; return; - case 16: O << "primask"; return; - case 17: O << "basepri"; return; - case 18: O << "basepri_max"; return; - case 19: O << "faultmask"; return; - case 20: O << "control"; return; + case 5: + case 0x805: O << "ipsr"; return; + case 6: + case 0x806: O << "epsr"; return; + case 7: + case 0x807: O << "iepsr"; return; + case 8: + case 0x808: O << "msp"; return; + case 9: + case 0x809: O << "psp"; return; + case 0x10: + case 0x810: O << "primask"; return; + case 0x11: + case 0x811: O << "basepri"; return; + case 0x12: + case 0x812: O << "basepri_max"; return; + case 0x13: + case 0x813: O << "faultmask"; return; + case 0x14: + case 0x814: O << "control"; return; } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 8dee1b1d6a..4d922d9b44 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -179,9 +179,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: - case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_ARM_PLT: Type = ELF::R_ARM_PLT32; @@ -193,6 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: @@ -253,10 +253,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_thumb_cp: case ARM::fixup_arm_thumb_br: llvm_unreachable("Unimplemented"); - case ARM::fixup_arm_uncondbranch: - Type = ELF::R_ARM_CALL; - break; case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index cf4f796ec2..1f8ca8681c 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -24,5 +24,7 @@ add_llvm_target(CellSPUCodeGen SPUNopFiller.cpp ) +add_dependencies(LLVMCellSPUCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 14021fef05..03d5a9ae0c 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -301,7 +301,9 @@ bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'L': // Write second word of DImode reference. // Verify that this operand has two consecutive registers. if (!MI->getOperand(OpNo).isReg() || diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3b90261fe6..54764f133c 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -72,7 +72,7 @@ TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { bool SPUPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createSPUISelDag(getSPUTargetMachine())); + addPass(createSPUISelDag(getSPUTargetMachine())); return false; } @@ -85,9 +85,9 @@ bool SPUPassConfig::addPreEmitPass() { (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( "createTCESchedulerPass"); if (schedulerCreator != NULL) - PM->add(schedulerCreator("cellspu")); + addPass(schedulerCreator("cellspu")); //align instructions with nops/lnops for dual issue - PM->add(createSPUNopFillerPass(getSPUTargetMachine())); + addPass(createSPUNopFillerPass(getSPUTargetMachine())); return true; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index cd2ebcb508..c8e757becc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -130,6 +130,7 @@ namespace { private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); + void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM); void printCallingConv(CallingConv::ID cc); void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); @@ -325,6 +326,26 @@ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { } } +void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + Out << "GlobalVariable::NotThreadLocal"; + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "GlobalVariable::GeneralDynamicTLSModel"; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "GlobalVariable::LocalDynamicTLSModel"; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "GlobalVariable::InitialExecTLSModel"; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "GlobalVariable::LocalExecTLSModel"; + break; + } +} + // printEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. void CppWriter::printEscapedString(const std::string &Str) { @@ -996,7 +1017,9 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) { } if (GV->isThreadLocal()) { printCppName(GV); - Out << "->setThreadLocal(true);"; + Out << "->setThreadLocalMode("; + printThreadLocalMode(GV->getThreadLocalMode()); + Out << ");"; nl(Out); } if (is_inline) { @@ -2078,7 +2101,9 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 92bca6c3c7..9cbe7981a9 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -31,7 +31,9 @@ struct CPPTargetMachine : public TargetMachine { virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify); + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 3db17484b0..1f2d8accbb 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -32,6 +32,8 @@ add_llvm_target(HexagonCodeGen HexagonNewValueJump.cpp ) +add_dependencies(LLVMHexagonCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 0dc243f2b8..5fa4740f2a 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -133,7 +133,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS); case 'c': // Don't print "$" before a global var name or constant. // Hexagon never has a prefix. printOperand(MI, OpNo, OS); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 7de27f74e2..a7b291ff2a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -102,47 +102,47 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - PM->add(createHexagonRemoveExtendOps(getHexagonTargetMachine())); - PM->add(createHexagonISelDag(getHexagonTargetMachine())); - PM->add(createHexagonPeephole()); + addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine())); + addPass(createHexagonPeephole()); return false; } bool HexagonPassConfig::addPreRegAlloc() { if (!DisableHardwareLoops) { - PM->add(createHexagonHardwareLoops()); + addPass(createHexagonHardwareLoops()); } return false; } bool HexagonPassConfig::addPostRegAlloc() { - PM->add(createHexagonCFGOptimizer(getHexagonTargetMachine())); + addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } bool HexagonPassConfig::addPreSched2() { - addPass(IfConverterID); + addPass(&IfConverterID); return true; } bool HexagonPassConfig::addPreEmitPass() { if (!DisableHardwareLoops) { - PM->add(createHexagonFixupHwLoops()); + addPass(createHexagonFixupHwLoops()); } - PM->add(createHexagonNewValueJump()); + addPass(createHexagonNewValueJump()); // Expand Spill code for predicate registers. - PM->add(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); + addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); // Split up TFRcondsets into conditional transfers. - PM->add(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); + addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); // Create Packets. - PM->add(createHexagonPacketizer()); + addPass(createHexagonPacketizer()); return false; } diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index bf1deef491..6c3e8b6447 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -30,6 +30,8 @@ add_llvm_target(MBlazeCodeGen MBlazeELFWriterInfo.cpp ) +add_dependencies(LLVMMBlazeCodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index b4edff0709..c2888553c5 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -50,7 +50,7 @@ def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", // MBlaze processors supported. //===----------------------------------------------------------------------===// -def : Processor<"mblaze", MBlazeGenericItineraries, []>; +def : Processor<"mblaze", NoItineraries, []>; def : Processor<"mblaze3", MBlazePipe3Itineraries, []>; def : Processor<"mblaze5", MBlazePipe5Itineraries, []>; diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 7269697ac2..e9f340f2f6 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -200,7 +200,13 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } printOperand(MI, OpNo, O); return false; diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 4a3ae5fc14..cd5691ce64 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -40,11 +40,6 @@ def IIC_WDC : InstrItinClass; def IIC_Pseudo : InstrItinClass; //===----------------------------------------------------------------------===// -// MBlaze generic instruction itineraries. -//===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>; - -//===----------------------------------------------------------------------===// // MBlaze instruction itineraries for three stage pipeline. //===----------------------------------------------------------------------===// include "MBlazeSchedule3.td" diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 62393d0920..5f82f14203 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -68,7 +68,7 @@ TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen MBlaze code. bool MBlazePassConfig::addInstSelector() { - PM->add(createMBlazeISelDag(getMBlazeTargetMachine())); + addPass(createMBlazeISelDag(getMBlazeTargetMachine())); return false; } @@ -76,6 +76,6 @@ bool MBlazePassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MBlazePassConfig::addPreEmitPass() { - PM->add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); + addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); return true; } diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index a8f9b52746..f9ecaed83a 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -23,6 +23,8 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) +add_dependencies(LLVMMSP430CodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 3acf96bb7d..817001d6ad 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -60,12 +60,12 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); + addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); return false; } bool MSP430PassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createMSP430BranchSelectionPass()); + addPass(createMSP430BranchSelectionPass()); return false; } diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt index ac21c259fb..6c7343bbe5 100644 --- a/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,6 +1,5 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) +add_dependencies(LLVMMipsAsmParser MipsCommonTableGen) diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index bccb5099ef..e9a228c331 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(MipsCodeGen MipsISelDAGToDAG.cpp MipsISelLowering.cpp MipsFrameLowering.cpp + MipsLongBranch.cpp MipsMCInstLower.cpp MipsMachineFunction.cpp MipsRegisterInfo.cpp @@ -31,6 +32,8 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) +add_dependencies(LLVMMipsCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(Disassembler) add_subdirectory(TargetInfo) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index b8fe772544..9c5d31e21c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -37,6 +37,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_GPRel_4: case FK_Data_4: case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_GPOFF_HI: + case Mips::fixup_Mips_GPOFF_LO: + case Mips::fixup_Mips_GOT_PAGE: + case Mips::fixup_Mips_GOT_OFST: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -75,10 +79,8 @@ public: :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - // @LOCALMOD-BEGIN-UPSTREAM return createMipsELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); - // @LOCALMOD-END-UPSTREAM } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided @@ -119,7 +121,8 @@ public: CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); } - uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); + uint64_t Mask = ((uint64_t)(-1) >> + (64 - getFixupKindInfo(Kind).TargetSize)); CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. @@ -160,7 +163,11 @@ public: { "fixup_Mips_TLSLDM", 0, 16, 0 }, { "fixup_Mips_DTPREL_HI", 0, 16, 0 }, { "fixup_Mips_DTPREL_LO", 0, 16, 0 }, - { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_GPOFF_HI", 0, 16, 0 }, + { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, + { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, + { "fixup_Mips_GOT_OFST", 0, 16, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 2091bec500..9f9272886e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,7 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); virtual ~MipsELFObjectWriter(); @@ -52,9 +52,11 @@ namespace { }; } -MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI) +MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false) {} + /*HasRelocationAddend*/ false, + /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} @@ -148,8 +150,23 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_Mips_PC16: Type = ELF::R_MIPS_PC16; break; + case Mips::fixup_Mips_GOT_PAGE: + Type = ELF::R_MIPS_GOT_PAGE; + break; + case Mips::fixup_Mips_GOT_OFST: + Type = ELF::R_MIPS_GOT_OFST; + break; + case Mips::fixup_Mips_GPOFF_HI: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type); + break; + case Mips::fixup_Mips_GPOFF_LO: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); + break; } - return Type; } @@ -184,10 +201,10 @@ static int CompareOffset(const RelEntry &R0, const RelEntry &R1) { void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) { - // Call the defualt function first. Relocations are sorted in descending + // Call the default function first. Relocations are sorted in descending // order of r_offset. MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); - + RelLs RelocLs; std::vector<RelLsIter> Unmatched; @@ -244,6 +261,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI); + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, + (Is64Bit) ? true : false); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 9b76eda861..1f6000cc8c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -95,6 +95,18 @@ namespace Mips { // PC relative branch fixup resulting in - R_MIPS_PC16 fixup_Mips_Branch_PCRel, + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 + fixup_Mips_GPOFF_HI, + + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 + fixup_Mips_GPOFF_LO, + + // resulting in - R_MIPS_PAGE + fixup_Mips_GOT_PAGE, + + // resulting in - R_MIPS_GOT_OFST + fixup_Mips_GOT_OFST, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 3b0e59b87a..8ab2edeca0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -187,7 +187,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isFPImm()) { return static_cast<unsigned>(APFloat(MO.getFPImm()) .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } + } // MO must be an Expr. assert(MO.isExpr()); @@ -201,10 +201,27 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } assert (Kind == MCExpr::SymbolRef); - + Mips::Fixups FixupKind = Mips::Fixups(0); switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP : + llvm_unreachable("fixup kind VK_Mips_GOT_DISP not supported for direct object!"); + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI : + FixupKind = Mips::fixup_Mips_GPOFF_HI; + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO : + FixupKind = Mips::fixup_Mips_GPOFF_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE : + FixupKind = Mips::fixup_Mips_GOT_PAGE; + break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST : + FixupKind = Mips::fixup_Mips_GOT_OFST; + break; case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; break; @@ -244,8 +261,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TPREL_LO: FixupKind = Mips::fixup_Mips_TPREL_LO; break; - default: - break; } // switch Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index ed61b642fc..411030aaa1 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -34,6 +34,7 @@ namespace llvm { FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index fc530939ed..2e0239377d 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -11,19 +11,29 @@ // //===----------------------------------------------------------------------===// +class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> { + let Predicates = [InMips16Mode]; +} + +def LI16E : FEXT_RI16<0b01101, (outs CPU16Regs:$rx), + (ins uimm16:$amt), + !strconcat("li", "\t$rx, $amt"), + [(set CPU16Regs:$rx, immZExt16:$amt )],IILoad>; + let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1, isBarrier=1, hasCtrlDep=1, rx=0, nd=0, l=0, ra=0 in -def RET16 : FRR16_JALRC < (outs), (ins CPURAReg:$target), - "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>; +def RET16 : FRR16_JALRC + < (outs), (ins CPURAReg:$target), + "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>; // As stack alignment is always done with addiu, we need a 16-bit immediate let Defs = [SP], Uses = [SP] in { def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt), - "!ADJCALLSTACKDOWN $amt", - [(callseq_start timm:$amt)]>; + "!ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), - "!ADJCALLSTACKUP $amt1", - [(callseq_end timm:$amt1, timm:$amt2)]>; + "!ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -31,4 +41,7 @@ def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), let isCall=1, hasDelaySlot=1, nd=0, l=0, ra=0 in def JumpLinkReg16: FRR16_JALRC<(outs), (ins CPU16Regs:$rs, variable_ops), - "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + +// Small immediates +def : Mips16Pat<(i32 immZExt16:$in), (LI16E immZExt16:$in)>; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 7e129b8b8d..a5a3038827 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -230,47 +230,49 @@ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), // extended loads let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), + (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; } // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>; - -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), - (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), - (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), - (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), + (DADDiu ZERO_64, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), + (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), + (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), + (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>; def : WrapperPat<tconstpool, DADDiu, CPU64Regs>; @@ -290,21 +292,22 @@ defm : SetgePats<CPU64Regs, SLT64, SLTu64>; defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, - Requires<[IsN64, HasStandardEncoding]>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, + Requires<[IsN64, HasStandardEncoding]>; // truncate -def : Pat<(i32 (trunc CPU64Regs:$src)), - (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, - Requires<[IsN64, HasStandardEncoding]>; +def : MipsPat<(i32 (trunc CPU64Regs:$src)), + (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, + Requires<[IsN64, HasStandardEncoding]>; // 32-to-64-bit extension -def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; -def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; -def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; +def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; // Sign extend in register -def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>; +def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), + (SLL64_64 CPU64Regs:$src)>; -// bswap pattern -def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; +// bswap MipsPattern +def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index b09c51179a..7167190f21 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,12 +18,12 @@ #include "MipsInstrInfo.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/BasicBlock.h" +#include "llvm/DebugInfo.h" +#include "llvm/Instructions.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -58,9 +58,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - MCInst TmpInst0; - MCInstLowering.Lower(MI, TmpInst0); - OutStreamer.EmitInstruction(TmpInst0); + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + do { + MCInst TmpInst0; + MCInstLowering.Lower(I++, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); + } while ((I != E) && I->isInsideBundle()); } //===----------------------------------------------------------------------===// @@ -236,15 +241,6 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { if (MipsFI->getEmitNOAT()) OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); } - - if ((MF->getTarget().getRelocationModel() == Reloc::PIC_) && - Subtarget->isABI_O32() && MipsFI->globalBaseRegSet()) { - SmallVector<MCInst, 4> MCInsts; - MCInstLowering.LowerSETGP01(MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); - } } /// EmitFunctionBodyEnd - Targets can override this to emit stuff after @@ -316,7 +312,8 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (ExtraCode[0]) { default: - return true; // Unknown modifier. + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O); case 'X': // hex const int if ((MO.getType()) != MachineOperand::MO_Immediate) return true; @@ -337,6 +334,17 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; O << MO.getImm() - 1; return false; + case 'z': { + // $0 if zero, regular printing otherwise + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + int64_t Val = MO.getImm(); + if (Val) + O << Val; + else + O << "$0"; + return false; + } } } @@ -349,11 +357,12 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + return false; } @@ -401,7 +410,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_BlockAddress: { - MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress()); + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); O << BA->getName(); break; } @@ -462,7 +471,7 @@ printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); + const MachineOperand &MO = MI->getOperand(opNum); O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 4b7e1d3766..8aadefdcd1 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -145,6 +145,58 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips FastCC Calling Convention +//===----------------------------------------------------------------------===// +def CC_MipsO32_FastCC : CallingConv<[ + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>, + + // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def CC_MipsN_FastCC : CallingConv<[ + // Integer arguments are passed in integer registers. + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64, + T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, + T8_64, V1_64]>>, + + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64, + D6_64, D7_64, D8_64, D9_64, D10_64, D11_64, + D12_64, D13_64, D14_64, D15_64, D16_64, D17_64, + D18_64, D19_64]>>, + + // Stack parameter slots for i64 and f64 are 64-bit doublewords and + // 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def CC_Mips_FastCC : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Integer arguments are passed in integer registers. All scratch registers, + // except for AT, V0 and T9, are available to be used as argument registers. + CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, + T7, T8, V1]>>, + + // f32 arguments are passed in single-precision floating pointer registers. + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, + F11, F12, F13, F14, F15, F16, F17, F18, F19]>>, + + // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, + CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FastCC>>, + CCDelegateTo<CC_MipsN_FastCC> +]>; + +//===----------------------------------------------------------------------===// // Mips Calling Convention Dispatch //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 7d819026da..c0e76399fb 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -145,8 +145,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E; ++I) emitInstruction(*I); } } while (MCE.finishFunction(MF)); diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 8b1215ab1e..b12b1f2b5a 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -61,47 +61,54 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, Instruction SLTiuOp> { - def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; - def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; - def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; + def : MipsPat< + (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; } multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction XOROp> { - def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; } multiclass MovzPats2<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction XORiOp> { - def : Pat<(select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F), + def : MipsPat< + (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F), (MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>; } multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, Instruction XOROp> { - def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select CRC:$cond, DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; - def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; } // Instantiation of instructions. def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">; -let Predicates = [HasMips64, HasStandardEncoding],DecoderNamespace = "Mips64" in { +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">; def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> { let isCodeGenOnly = 1; @@ -139,7 +146,8 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in { def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">; def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">; } -let Predicates = [IsFP64bit, HasStandardEncoding],DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">; def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> { let isCodeGenOnly = 1; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index a8c4b05ecd..2bba8a3802 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -45,10 +45,12 @@ static cl::opt<bool> SkipDelaySlotFiller( namespace { struct Filler : public MachineFunctionPass { + typedef MachineBasicBlock::instr_iterator InstrIter; + typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; TargetMachine &TM; const TargetInstrInfo *TII; - MachineBasicBlock::iterator LastFiller; + InstrIter LastFiller; static char ID; Filler(TargetMachine &tm) @@ -71,27 +73,27 @@ namespace { } bool isDelayFiller(MachineBasicBlock &MBB, - MachineBasicBlock::iterator candidate); + InstrIter candidate); - void insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses); + void insertCallUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); - void insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses); + void insertDefsUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); - bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, + bool IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg); - bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet<unsigned, 32> &RegDefs, SmallSet<unsigned, 32> &RegUses); bool - findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler); + findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, + InstrIter &Filler); }; @@ -103,14 +105,14 @@ namespace { bool Filler:: runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - LastFiller = MBB.end(); + LastFiller = MBB.instr_end(); - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) if (I->hasDelaySlot()) { ++FilledSlots; Changed = true; - MachineBasicBlock::iterator D; + InstrIter D; if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); @@ -121,6 +123,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { // Record the filler instruction that filled the delay slot. // The instruction after it will be visited in the next iteration. LastFiller = ++I; + + // Set InsideBundle bit so that the machine verifier doesn't expect this + // instruction to be a terminator. + LastFiller->setIsInsideBundle(); } return Changed; @@ -133,8 +139,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { } bool Filler::findDelayInstr(MachineBasicBlock &MBB, - MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler) { + InstrIter slot, + InstrIter &Filler) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; @@ -143,13 +149,13 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) { + for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; // Convert to forward iterator. - MachineBasicBlock::iterator FI(llvm::next(I).base()); + InstrIter FI(llvm::next(I).base()); if (I->hasUnmodeledSideEffects() || I->isInlineAsm() @@ -175,7 +181,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, return false; } -bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, +bool Filler::delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet<unsigned, 32> &RegDefs, SmallSet<unsigned, 32> &RegUses) { @@ -223,9 +229,9 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, } // Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses) { +void Filler::insertDefsUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses) { // If MI is a call or return, just examine the explicit non-variadic operands. MCInstrDesc MCID = MI->getDesc(); unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : @@ -250,7 +256,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, } //returns true if the Reg or its alias is in the RegSet. -bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { +bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) { // Check Reg and all aliased Registers. for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); AI.isValid(); ++AI) diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 27609c13ea..5afd2fc576 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -94,38 +94,6 @@ bool MipsFrameLowering::targetHandlesStackFrameRounding() const { return true; } -// Build an instruction sequence to load an immediate that is too large to fit -// in 16-bit and add the result to Reg. -static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, - const MipsInstrInfo &TII, MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL) { - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate AnalyzeImm; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. - for (++Inst; Inst != Seq.end(); ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); -} - void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -144,9 +112,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // First, compute final stack size. unsigned StackAlign = getStackAlignment(); - uint64_t StackSize = - RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign) + - RoundUpToAlignment(MFI->getStackSize(), StackAlign); + uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign); + + if (MipsFI->globalBaseRegSet()) + StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign; + else + StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign); // Update stack size MFI->setStackSize(StackSize); @@ -162,8 +133,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); else { // Expand immediate that doesn't fit in 16-bit. - MipsFI->setEmitNOAT(); - expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, + 0); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); } // emit ".cfi_def_cfa_offset StackSize" @@ -264,14 +239,20 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, // Adjust stack. if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); - else // Expand immediate that doesn't fit in 16-bit. - expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + else { // Expand immediate that doesn't fit in 16-bit. + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, + 0); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); + } } void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MachineRegisterInfo& MRI = MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index b1ac73579f..f1c672ad86 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -117,15 +117,22 @@ private: void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - if (!MipsFI->globalBaseRegSet()) + if (((MF.getTarget().getRelocationModel() == Reloc::Static) || + Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo(); + const MipsInstrInfo *MII = TM.getInstrInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + int FI = 0; + + if (!Subtarget.inMips16Mode()) + FI= MipsFI->initGlobalRegFI(); const TargetRegisterClass *RC = Subtarget.isABI_N64() ? (const TargetRegisterClass*)&Mips::CPU64RegsRegClass : @@ -144,9 +151,12 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { const GlobalValue *FName = MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } @@ -159,6 +169,8 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } @@ -175,11 +187,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } assert(Subtarget.isABI_O32()); + if (Subtarget.inMips16Mode()) + return; // no need to load GP. It can be calculated anywhere + + // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // @@ -201,6 +219,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo); } bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, @@ -274,7 +293,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // If Parent is an unaligned f32 load or store, select a (base + index) // floating point load/store instruction (luxc1 or suxc1). - const LSBaseSDNode* LS = 0; + const LSBaseSDNode *LS = 0; if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { EVT VT = LS->getMemoryVT(); @@ -335,17 +354,18 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1); - if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || - isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { + SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); - Offset = LoVal.getOperand(0); + Offset = Opnd0; return true; } } // If an indexed floating point load/store can be emitted, return false. - if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/) return false; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 04d4743b35..bc0a616e33 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -304,6 +304,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::ADD); setMinFunctionAlignment(HasMips64 ? 3 : 2); @@ -312,6 +313,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); + + maxStoresPerMemcpy = 16; } bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { @@ -340,17 +343,17 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { +static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. - SDNode* ADDCNode = ADDENode->getOperand(2).getNode(); + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); if (ADDCNode->getOpcode() != ISD::ADDC) return false; SDValue MultHi = ADDENode->getOperand(0); SDValue MultLo = ADDCNode->getOperand(0); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -413,17 +416,17 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { +static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. - SDNode* SUBCNode = SUBENode->getOperand(2).getNode(); + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); if (SUBCNode->getOpcode() != ISD::SUBC) return false; SDValue MultHi = SUBENode->getOperand(1); SDValue MultLo = SUBCNode->getOperand(1); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -478,9 +481,9 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { return true; } -static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -491,9 +494,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -504,9 +507,9 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -581,7 +584,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) { // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. -static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { +static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; @@ -603,7 +606,7 @@ static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { } // Creates and returns a CMovFPT/F node. -static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, +static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { bool invert = InvertFPCondCode((Mips::CondCode) cast<ConstantSDNode>(Cond.getOperand(2)) @@ -613,9 +616,9 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -639,16 +642,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, const DebugLoc DL = N->getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); SDValue True = N->getOperand(1); - + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); - + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } -static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, size, pos @@ -686,9 +689,9 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize, MVT::i32)); } -static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 @@ -740,6 +743,33 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0)); } +static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Add = N->getOperand(1); + + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue Lo = Add.getOperand(1); + + if ((Lo.getOpcode() != MipsISD::Lo) || + (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable)) + return SDValue(); + + EVT ValTy = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0), + Add.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -755,11 +785,13 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) case ISD::UDIVREM: return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: - return PerformSELECTCombine(N, DAG, DCI, Subtarget); + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DAG, DCI, Subtarget); + case ISD::ADD: + return PerformADDCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -832,7 +864,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { /* static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, DebugLoc dl, - const MipsSubtarget* Subtarget, + const MipsSubtarget *Subtarget, const TargetInstrInfo *TII, bool isFPCmp, unsigned Opc) { // There is no need to expand CMov instructions if target has @@ -2053,7 +2085,7 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // TODO: set SType according to the desired memory barrier behavior. SDValue -MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { +MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { unsigned SType = 0; DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), @@ -2061,7 +2093,7 @@ MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { } SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; @@ -2071,7 +2103,7 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, } SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); @@ -2093,15 +2125,15 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(0x20, MVT::i32)); - Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, DAG.getConstant(0, MVT::i32), - ShiftLeftLo); + Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, + DAG.getConstant(0, MVT::i32), ShiftLeftLo); Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, 2, DL); } -SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, +SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); @@ -2144,17 +2176,15 @@ SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, SDValue Chain, SDValue Src, unsigned Offset) { - SDValue BasePtr = LD->getBasePtr(), Ptr; + SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); - EVT BasePtrVT = BasePtr.getValueType(); + EVT BasePtrVT = Ptr.getValueType(); DebugLoc DL = LD->getDebugLoc(); SDVTList VTList = DAG.getVTList(VT, MVT::Other); if (Offset) - Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr, + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, BasePtrVT)); - else - Ptr = BasePtr; SDValue Ops[] = { Chain, Ptr, Src }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, @@ -2225,17 +2255,14 @@ SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, SDValue Chain, unsigned Offset) { - SDValue BasePtr = SD->getBasePtr(), Ptr, Value = SD->getValue(); - EVT MemVT = SD->getMemoryVT(); - EVT BasePtrVT = BasePtr.getValueType(); + SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); + EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); DebugLoc DL = SD->getDebugLoc(); SDVTList VTList = DAG.getVTList(MVT::Other); if (Offset) - Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr, + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, BasePtrVT)); - else - Ptr = BasePtr; SDValue Ops[] = { Chain, Value, Ptr }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, @@ -2472,10 +2499,10 @@ static unsigned getNextIntArgReg(unsigned Reg) { // Write ByVal Arg to arg registers and stack. static void WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, - SmallVector<SDValue, 8>& MemOpChains, int& LastFI, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, int &LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MVT PtrType, bool isLittle) { unsigned LocMemOffset = VA.getLocMemOffset(); unsigned Offset = 0; @@ -2563,10 +2590,10 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, // Copy Mips64 byVal arg to registers and stack. void static PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, - SmallVector<SDValue, 8>& MemOpChains, int& LastFI, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, int &LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, EVT PtrTy, bool isLittle) { unsigned ByValSize = Flags.getByValSize(); unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); @@ -2679,7 +2706,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); else if (HasMips64) AnalyzeMips64CallOperands(CCInfo, Outs); @@ -2704,7 +2733,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. - if (IsO32) + if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); @@ -2958,7 +2987,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Mips); @@ -2977,9 +3006,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue>& OutChains, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, const Argument *FuncArg) { unsigned LocMem = VA.getLocMemOffset(); unsigned FirstWord = LocMem / 4; @@ -3004,8 +3033,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, // Create frame object on stack and copy registers used for byval passing to it. static unsigned CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue>& OutChains, SelectionDAG &DAG, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, EVT PtrTy, const Argument *FuncArg) { @@ -3064,7 +3093,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -3250,7 +3281,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); @@ -3398,6 +3429,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'r': if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) return std::make_pair(0U, &Mips::CPURegsRegClass); + if (VT == MVT::i64 && !HasMips64) + return std::make_pair(0U, &Mips::CPURegsRegClass); if (VT == MVT::i64 && HasMips64) return std::make_pair(0U, &Mips::CPU64RegsRegClass); // This will generate an error message @@ -3530,6 +3563,16 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } +EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const { + if (Subtarget->hasMips64()) + return MVT::i64; + + return MVT::i32; +} + bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 5342e37f28..b9975c550b 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -146,7 +146,8 @@ namespace llvm { SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + bool IsSRA) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; @@ -202,6 +203,11 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 29bd2dc494..c757b4c33f 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -54,10 +54,14 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, + AssemblerPredicate<"FeatureFP64Bit">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, + AssemblerPredicate<"!FeatureFP64Bit">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, + AssemblerPredicate<"FeatureSingleFloat">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, + AssemblerPredicate<"!FeatureSingleFloat">; // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ @@ -428,46 +432,52 @@ def ExtractElementF64 : //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// -def : Pat<(f32 fpimm0), (MTC1 ZERO)>; -def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; +def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>; +def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; -def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; -def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; +def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; +def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; let Predicates = [NotFP64bit, HasStandardEncoding] in { - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; - def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; - def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D32_W (MTC1 CPURegs:$src))>; + def : MipsPat<(i32 (fp_to_sint AFGR64:$src)), + (MFC1 (TRUNC_W_D32 AFGR64:$src))>; + def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; } let Predicates = [IsFP64bit, HasStandardEncoding] in { - def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>; - def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; + def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>; + def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>; - def : Pat<(f32 (sint_to_fp CPU64Regs:$src)), - (CVT_S_L (DMTC1 CPU64Regs:$src))>; - def : Pat<(f64 (sint_to_fp CPU64Regs:$src)), - (CVT_D64_L (DMTC1 CPU64Regs:$src))>; + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D64_W (MTC1 CPURegs:$src))>; + def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)), + (CVT_S_L (DMTC1 CPU64Regs:$src))>; + def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)), + (CVT_D64_L (DMTC1 CPU64Regs:$src))>; - def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; - def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; - def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; + def : MipsPat<(i32 (fp_to_sint FGR64:$src)), + (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR64:$src)), + (DMFC1 (TRUNC_L_D64 FGR64:$src))>; - def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; + def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } // Patterns for unaligned floating point loads and stores. let Predicates = [HasMips32r2Or64, NotN64, NotNaCl/*@LOCALMOD*/] in { - def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; - def : Pat<(store_u FGR32:$src, CPURegs:$addr), - (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; + def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : MipsPat<(store_u FGR32:$src, CPURegs:$addr), + (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; } let Predicates = [IsN64, NotNaCl/*@LOCALMOD*/] in { - def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; - def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), - (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; + def : MipsPat<(f32 (load_u CPU64Regs:$addr)), + (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr), + (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index c01830d509..e4eefb9905 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" @@ -329,9 +330,9 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc) } } -static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, +static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, - SmallVectorImpl<MachineOperand>& Cond) { + SmallVectorImpl<MachineOperand> &Cond) { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); @@ -505,3 +506,58 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const return false; } +/// Return the number of bytes of code the specified instruction may be. +unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + return MI->getDesc().getSize(); + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + } +} + +unsigned +llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst) { + MipsAnalyzeImmediate AnalyzeImm; + unsigned Size = IsN64 ? 64 : 32; + unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + if (LastInst && (Seq.size() == 1)) { + *LastInst = *Inst; + return 0; + } + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. Skip the last instruction if + // LastInst is not 0. + for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInst) + *LastInst = *Inst; + + return Seq.size() - !!LastInst; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 51cc9afdfa..7a0065b634 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,6 +15,7 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -24,12 +25,6 @@ namespace llvm { -namespace Mips { - /// GetOppositeBranchOpc - Return the inverse of the specified - /// opcode, e.g. turning BEQ to BNE. - unsigned GetOppositeBranchOpc(unsigned Opc); -} - class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; @@ -109,8 +104,27 @@ public: /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + + /// Return the number of bytes of code the specified instruction may be. + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; }; +namespace Mips { + /// GetOppositeBranchOpc - Return the inverse of the specified + /// opcode, e.g. turning BEQ to BNE. + unsigned GetOppositeBranchOpc(unsigned Opc); + + /// Emit a series of instructions to load an immediate. All instructions + /// except for the last one are emitted. The function returns the number of + /// MachineInstrs generated. The opcode-immediate pair of the last + /// instruction is returned in LastInst, if it is not 0. + unsigned + loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, MachineBasicBlock::iterator II, + DebugLoc DL, bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst); +} + } #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 60343293e8..5e388281b5 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -174,6 +174,10 @@ def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">, def IsNaCl : Predicate<"Subtarget.isTargetNaCl()">; def NotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; +class MipsPat<dag pattern, dag result> : Pat<pattern, result> { + let Predicates = [HasStandardEncoding]; +} + //===----------------------------------------------------------------------===// // Instruction format superclass //===----------------------------------------------------------------------===// @@ -218,6 +222,7 @@ def mem : Operand<i32> { def mem64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); + let EncoderMethod = "getMemEncoding"; } def mem_ea : Operand<i32> { @@ -563,6 +568,7 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, @@ -574,6 +580,7 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } // SetCC @@ -603,6 +610,7 @@ class JumpFJ<bits<6> op, string instr_asm>: let hasDelaySlot = 1; let Predicates = [RelocStatic, HasStandardEncoding]; let DecoderMethod = "DecodeJumpTarget"; + let Defs = [AT]; } // Unconditional branch @@ -616,6 +624,7 @@ class UncondBranch<bits<6> op, string instr_asm>: let isBarrier = 1; let hasDelaySlot = 1; let Predicates = [RelocPIC, HasStandardEncoding]; + let Defs = [AT]; } let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, @@ -1087,67 +1096,67 @@ def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// // Small immediates -def : Pat<(i32 immSExt16:$in), - (ADDiu ZERO, imm:$in)>; -def : Pat<(i32 immZExt16:$in), - (ORi ZERO, imm:$in)>; -def : Pat<(i32 immLow16Zero:$in), - (LUi (HI16 imm:$in))>; +def : MipsPat<(i32 immSExt16:$in), + (ADDiu ZERO, imm:$in)>; +def : MipsPat<(i32 immZExt16:$in), + (ORi ZERO, imm:$in)>; +def : MipsPat<(i32 immLow16Zero:$in), + (LUi (HI16 imm:$in))>; // Arbitrary immediates -def : Pat<(i32 imm:$imm), +def : MipsPat<(i32 imm:$imm), (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>; -// Carry patterns -def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), - (SUBu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), - (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$src, immSExt16:$imm), - (ADDiu CPURegs:$src, imm:$imm)>; +// Carry MipsPatterns +def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs), + (SUBu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs), + (ADDu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$src, immSExt16:$imm), + (ADDiu CPURegs:$src, imm:$imm)>; // Call -def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), - (JAL tglobaladdr:$dst)>; -def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), - (JAL texternalsym:$dst)>; -//def : Pat<(MipsJmpLink CPURegs:$dst), -// (JALR CPURegs:$dst)>; +def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (JAL tglobaladdr:$dst)>; +def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), + (JAL texternalsym:$dst)>; +//def : MipsPat<(MipsJmpLink CPURegs:$dst), +// (JALR CPURegs:$dst)>; // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; - -def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), - (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), - (ADDiu CPURegs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), - (ADDiu CPURegs:$hi, tjumptable:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), - (ADDiu CPURegs:$hi, tconstpool:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), + (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), + (ADDiu CPURegs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), + (ADDiu CPURegs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), + (ADDiu CPURegs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; // gp_rel relocs -def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), - (ADDiu CPURegs:$gp, tglobaladdr:$in)>; -def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), - (ADDiu CPURegs:$gp, tconstpool:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), + (ADDiu CPURegs:$gp, tglobaladdr:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), + (ADDiu CPURegs:$gp, tconstpool:$in)>; // wrapper_pic class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>: - Pat<(MipsWrapper RC:$gp, node:$in), - (ADDiuOp RC:$gp, node:$in)>; + MipsPat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; def : WrapperPat<tglobaladdr, ADDiu, CPURegs>; def : WrapperPat<tconstpool, ADDiu, CPURegs>; @@ -1157,58 +1166,58 @@ def : WrapperPat<tjumptable, ADDiu, CPURegs>; def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>; // Mips does not have "not", so we expand our way -def : Pat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; +def : MipsPat<(not CPURegs:$in), + (NOR CPURegs:$in, ZERO)>; // extended loads let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; } // peepholes let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; + def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; } // brcond patterns multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp, Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, Instruction SLTiuOp, Register ZEROReg> { -def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), - (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), - (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond RC:$cond, bb:$dst), - (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; } defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; @@ -1216,39 +1225,39 @@ defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; // setcc patterns multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp, Instruction SLTuOp, Register ZEROReg> { - def : Pat<(seteq RC:$lhs, RC:$rhs), - (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setne RC:$lhs, RC:$rhs), - (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; + def : MipsPat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; } multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setle RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; - def : Pat<(setule RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setle RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setule RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; } multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setgt RC:$lhs, RC:$rhs), - (SLTOp RC:$rhs, RC:$lhs)>; - def : Pat<(setugt RC:$lhs, RC:$rhs), - (SLTuOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; } multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setge RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; } multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp, Instruction SLTiuOp> { - def : Pat<(setge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; } defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>; @@ -1258,10 +1267,10 @@ defm : SetgePats<CPURegs, SLT, SLTu>; defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; // bswap pattern -def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; +def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; //===----------------------------------------------------------------------===// // Floating Point Support diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 76ca3e1767..150bdbbe6f 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -154,8 +154,8 @@ TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { return Result; } -void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { +void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) { JCE.emitAlignment(4); void *Addr = (void*) (JCE.getCurrentPCValue()); if (!sys::Memory::setRangeWritable(Addr, 16)) @@ -193,7 +193,7 @@ void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { + unsigned NumRelocs, unsigned char *GOTBase) { for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index f4c4ae86d3..637a318660 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -45,8 +45,8 @@ class MipsJITInfo : public TargetJITInfo { /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); + virtual void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE); /// getLazyResolverFunction - Expose the lazy resolver to the JIT. virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); @@ -55,7 +55,7 @@ class MipsJITInfo : public TargetJITInfo { /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); + unsigned NumRelocs, unsigned char *GOTBase); /// Initialize - Initialize internal stage for the function being JITted. void Initialize(const MachineFunction &MF, bool isPIC) { diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp new file mode 100644 index 0000000000..7be353f190 --- /dev/null +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -0,0 +1,418 @@ +//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands a branch or jump instruction into a long branch if its +// offset is too large to fit into its immediate field. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-long-branch" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(LongBranches, "Number of long branches."); + +static cl::opt<bool> SkipLongBranch( + "skip-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Skip long branch pass."), + cl::Hidden); + +static cl::opt<bool> ForceLongBranch( + "force-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Expand all branches to long format."), + cl::Hidden); + +namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + + struct MBBInfo { + uint64_t Size; + bool HasLongBranch; + MachineInstr *Br; + + MBBInfo() : Size(0), HasLongBranch(false), Br(0) {} + }; + + class MipsLongBranch : public MachineFunctionPass { + + public: + static char ID; + MipsLongBranch(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {} + + virtual const char *getPassName() const { + return "Mips Long Branch"; + } + + bool runOnMachineFunction(MachineFunction &F); + + private: + void splitMBB(MachineBasicBlock *MBB); + void initMBBInfo(); + int64_t computeOffset(const MachineInstr *Br); + bool offsetFitsIntoField(const MachineInstr *Br); + unsigned addLongBranch(MachineBasicBlock &MBB, Iter Pos, + MachineBasicBlock *Tgt, DebugLoc DL, bool Nop); + void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, + MachineBasicBlock *MBBOpnd); + void expandToLongBranch(MBBInfo &Info); + + const TargetMachine &TM; + const MipsInstrInfo *TII; + MachineFunction *MF; + SmallVector<MBBInfo, 16> MBBInfos; + }; + + char MipsLongBranch::ID = 0; +} // end of anonymous namespace + +/// createMipsLongBranchPass - Returns a pass that converts branches to long +/// branches. +FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) { + return new MipsLongBranch(tm); +} + +/// Iterate over list of Br's operands and search for a MachineBasicBlock +/// operand. +static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { + for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) { + const MachineOperand &MO = Br.getOperand(I); + + if (MO.isMBB()) + return MO.getMBB(); + } + + assert(false && "This instruction does not have an MBB operand."); + return 0; +} + +// Traverse the list of instructions backwards until a non-debug instruction is +// found or it reaches E. +static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) { + for (; B != E; ++B) + if (!B->isDebugValue()) + return B; + + return E; +} + +// Split MBB if it has two direct jumps/branches. +void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) { + ReverseIter End = MBB->rend(); + ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End); + + // Return if MBB has no branch instructions. + if ((LastBr == End) || + (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) + return; + + ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End); + + // MBB has only one branch instruction if FirstBr is not a branch + // instruction. + if ((FirstBr == End) || + (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) + return; + + assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found."); + + // Create a new MBB. Move instructions in MBB to the newly created MBB. + MachineBasicBlock *NewMBB = + MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + + // Insert NewMBB and fix control flow. + MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); + NewMBB->transferSuccessors(MBB); + NewMBB->removeSuccessor(Tgt); + MBB->addSuccessor(NewMBB); + MBB->addSuccessor(Tgt); + MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + + NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end()); +} + +// Fill MBBInfos. +void MipsLongBranch::initMBBInfo() { + // Split the MBBs if they have two branches. Each basic block should have at + // most one branch after this loop is executed. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;) + splitMBB(I++); + + MF->RenumberBlocks(); + MBBInfos.clear(); + MBBInfos.resize(MF->size()); + + for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + + // Compute size of MBB. + for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin(); + MI != MBB->instr_end(); ++MI) + MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI); + + // Search for MBB's branch instruction. + ReverseIter End = MBB->rend(); + ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End); + + if ((Br != End) && !Br->isIndirectBranch() && + (Br->isConditionalBranch() || Br->isUnconditionalBranch())) + MBBInfos[I].Br = (++Br).base(); + } +} + +// Compute offset of branch in number of bytes. +int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { + int64_t Offset = 0; + int ThisMBB = Br->getParent()->getNumber(); + int TargetMBB = getTargetMBB(*Br)->getNumber(); + + // Compute offset of a forward branch. + if (ThisMBB < TargetMBB) { + for (int N = ThisMBB + 1; N < TargetMBB; ++N) + Offset += MBBInfos[N].Size; + + return Offset + 4; + } + + // Compute offset of a backward branch. + for (int N = ThisMBB; N >= TargetMBB; --N) + Offset += MBBInfos[N].Size; + + return -Offset + 4; +} + +// Insert the following sequence: +// (pic or N64) +// lw $at, global_reg_slot +// lw $at, got($L1)($at) +// addiu $at, $at, lo($L1) +// jr $at +// noop +// (static and !N64) +// lui $at, hi($L1) +// addiu $at, $at, lo($L1) +// jr $at +// noop +unsigned MipsLongBranch::addLongBranch(MachineBasicBlock &MBB, Iter Pos, + MachineBasicBlock *Tgt, DebugLoc DL, + bool Nop) { + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI(); + bool N64 = (ABI == MipsSubtarget::N64); + unsigned NumInstrs; + + if (IsPIC || N64) { + bool HasMips64 = TM.getSubtarget<MipsSubtarget>().hasMips64(); + unsigned AT = N64 ? Mips::AT_64 : Mips::AT; + unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; + unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; + unsigned JR = N64 ? Mips::JR64 : Mips::JR; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + const MipsRegisterInfo *MRI = + static_cast<const MipsRegisterInfo*>(TM.getRegisterInfo()); + unsigned SP = MRI->getFrameRegister(*MF); + unsigned GlobalRegFI = MF->getInfo<MipsFunctionInfo>()->getGlobalRegFI(); + int64_t Offset = MF->getFrameInfo()->getObjectOffset(GlobalRegFI); + + if (isInt<16>(Offset)) { + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(SP).addImm(Offset); + NumInstrs = 1; + } else { + unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; + MipsAnalyzeImmediate::Inst LastInst(0, 0); + + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + NumInstrs = Mips::loadImmediate(Offset, N64, *TII, MBB, Pos, DL, true, + &LastInst) + 2; + BuildMI(MBB, Pos, DL, TII->get(ADDu), AT).addReg(SP).addReg(AT); + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT) + .addImm(SignExtend64<16>(LastInst.ImmOpnd)); + } + + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT).addMBB(Tgt, GOTFlag); + BuildMI(MBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT).addMBB(Tgt, OFSTFlag); + BuildMI(MBB, Pos, DL, TII->get(JR)).addReg(Mips::AT, RegState::Kill); + NumInstrs += 3; + } else { + BuildMI(MBB, Pos, DL, TII->get(Mips::LUi), Mips::AT) + .addMBB(Tgt, MipsII::MO_ABS_HI); + BuildMI(MBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) + .addReg(Mips::AT).addMBB(Tgt, MipsII::MO_ABS_LO); + BuildMI(MBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT, RegState::Kill); + NumInstrs = 3; + } + + if (Nop) { + BuildMI(MBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); + ++NumInstrs; + } + + return NumInstrs; +} + +// Replace Br with a branch which has the opposite condition code and a +// MachineBasicBlock operand MBBOpnd. +void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, + DebugLoc DL, MachineBasicBlock *MBBOpnd) { + unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode()); + const MCInstrDesc &NewDesc = TII->get(NewOpc); + + MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); + + for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) { + MachineOperand &MO = Br->getOperand(I); + + if (!MO.isReg()) { + assert(MO.isMBB() && "MBB operand expected."); + break; + } + + MIB.addReg(MO.getReg()); + } + + MIB.addMBB(MBBOpnd); + + Br->eraseFromParent(); +} + +// Expand branch instructions to long branches. +void MipsLongBranch::expandToLongBranch(MBBInfo &I) { + I.HasLongBranch = true; + + MachineBasicBlock *MBB = I.Br->getParent(), *Tgt = getTargetMBB(*I.Br); + DebugLoc DL = I.Br->getDebugLoc(); + + if (I.Br->isUnconditionalBranch()) { + // Unconditional branch before transformation: + // b $tgt + // delay-slot-instr + // + // after transformation: + // delay-slot-instr + // lw $at, global_reg_slot + // lw $at, %got($tgt)($at) + // addiu $at, $at, %lo($tgt) + // jr $at + // nop + I.Size += (addLongBranch(*MBB, llvm::next(Iter(I.Br)), Tgt, DL, true) + - 1) * 4; + + // Remove branch and clear InsideBundle bit of the next instruction. + llvm::next(MachineBasicBlock::instr_iterator(I.Br)) + ->setIsInsideBundle(false); + I.Br->eraseFromParent(); + return; + } + + assert(I.Br->isConditionalBranch() && "Conditional branch expected."); + + // Conditional branch before transformation: + // b cc, $tgt + // delay-slot-instr + // FallThrough: + // + // after transformation: + // b !cc, FallThrough + // delay-slot-instr + // NewMBB: + // lw $at, global_reg_slot + // lw $at, %got($tgt)($at) + // addiu $at, $at, %lo($tgt) + // jr $at + // noop + // FallThrough: + + MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + MBB->removeSuccessor(Tgt); + MBB->addSuccessor(NewMBB); + NewMBB->addSuccessor(Tgt); + + I.Size += addLongBranch(*NewMBB, NewMBB->begin(), Tgt, DL, true) * 4; + replaceBranch(*MBB, I.Br, DL, *MBB->succ_begin()); +} + +static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { + MachineBasicBlock &MBB = F.front(); + MachineBasicBlock::iterator I = MBB.begin(); + DebugLoc DL = MBB.findDebugLoc(MBB.begin()); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0) + .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + MBB.removeLiveIn(Mips::V0); +} + +bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { + if ((TM.getRelocationModel() == Reloc::PIC_) && + TM.getSubtarget<MipsSubtarget>().isABI_O32() && + F.getInfo<MipsFunctionInfo>()->globalBaseRegSet()) + emitGPDisp(F, TII); + + if (SkipLongBranch) + return true; + + MF = &F; + initMBBInfo(); + + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + SmallVector<MBBInfo, 16>::iterator I, E = MBBInfos.end(); + bool EverMadeChange = false, MadeChange = true; + + while (MadeChange) { + MadeChange = false; + + for (I = MBBInfos.begin(); I != E; ++I) { + // Skip if this MBB doesn't have a branch or the branch has already been + // converted to a long branch. + if (!I->Br || I->HasLongBranch) + continue; + + if (!ForceLongBranch) { + int64_t Offset = computeOffset(I->Br); + + // Check if offset fits into 16-bit immediate field of branches. + if ((I->Br->isConditionalBranch() || IsPIC) && isInt<16>(Offset / 4)) + continue; + + // Check if offset fits into 26-bit immediate field of jumps (J). + if (I->Br->isUnconditionalBranch() && !IsPIC && isInt<26>(Offset / 4)) + continue; + } + + expandToLongBranch(*I); + ++LongBranches; + EverMadeChange = MadeChange = true; + } + } + + if (EverMadeChange) + MF->RenumberBlocks(); + + return true; +} diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 0475777eac..ac33619c34 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -29,7 +29,7 @@ using namespace llvm; MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter) : AsmPrinter(asmprinter) {} -void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) { +void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) { Mang = M; Ctx = C; } @@ -105,21 +105,23 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, assert(Offset > 0); const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); - return MCOperand::CreateExpr(AddExpr); + const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + return MCOperand::CreateExpr(Add); } -static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0, - const MCOperand& Opnd1, - const MCOperand& Opnd2 = MCOperand()) { +/* +static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0, + const MCOperand &Opnd1, + const MCOperand &Opnd2 = MCOperand()) { Inst.setOpcode(Opc); Inst.addOperand(Opnd0); Inst.addOperand(Opnd1); if (Opnd2.isValid()) Inst.addOperand(Opnd2); } +*/ -MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, unsigned offset) const { MachineOperandType MOTy = MO.getType(); @@ -157,11 +159,6 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -// Create the following two instructions: -// "lui $2, %hi(_gp_disp)" -// "addiu $2, $2, %lo(_gp_disp)" -void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) { - MCOperand RegOpnd = MCOperand::CreateReg(Mips::V0); MCInst Instr4, Mask1, Mask2; // @LOCALMOD // @LOCALMOD-START MCOperand MaskReg = MCOperand::CreateReg(Mips::LoadStoreStackMaskReg); @@ -205,18 +202,4 @@ void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) { llvm_unreachable("unaligned instruction not sandboxed"); } } - // @LOCALMOD-END - StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); - const MCSymbolRefExpr *MCSym; - - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); - MCOperand SymHi = MCOperand::CreateExpr(MCSym); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); - MCOperand SymLo = MCOperand::CreateExpr(MCSym); - - MCInsts.resize(2); - - CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi); - CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo); -} + // @LOCALMOD-END
\ No newline at end of file diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 0f4944e423..314420a170 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -31,9 +31,8 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { MipsAsmPrinter &AsmPrinter; public: MipsMCInstLower(MipsAsmPrinter &asmprinter); - void Initialize(Mangler *mang, MCContext* C); + void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerSETGP01(SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 04cb3f22b2..b2232c6573 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,8 +14,11 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H +#include "MipsSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" #include <utility> namespace llvm { @@ -45,6 +48,7 @@ class MipsFunctionInfo : public MachineFunctionInfo { // OutArgFIRange: Range of indices of all frame objects created during call to // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; + int GlobalRegFI; mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; @@ -54,7 +58,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), DynAllocFI(0), + OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0), MaxCallFrameSize(0), EmitNOAT(false) {} @@ -73,6 +77,24 @@ public: OutArgFIRange.second = LastFI; } + bool isGlobalRegFI(int FI) const { + return GlobalRegFI && (FI == GlobalRegFI); + } + + int getGlobalRegFI() const { + return GlobalRegFI; + } + + int initGlobalRegFI() { + const TargetMachine &TM = MF.getTarget(); + unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4; + int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment(); + uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment); + + GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true); + return GlobalRegFI; + } + // The first call to this function creates a frame object for dynamically // allocated stack area. int getDynAllocFI() const { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 203cd9031c..3572f7d4d4 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -16,9 +16,11 @@ #include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" #include "MipsSubtarget.h" #include "MipsMachineFunction.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Type.h" #include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" @@ -35,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/DebugInfo.h" #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" @@ -54,8 +55,7 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } /// Mips Callee Saved Registers const uint16_t* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const -{ +getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; else if (!Subtarget.hasMips64()) @@ -64,12 +64,11 @@ getCalleeSavedRegs(const MachineFunction *MF) const return CSR_N32_SaveList; assert(Subtarget.isABI_N64()); - return CSR_N64_SaveList; + return CSR_N64_SaveList; } const uint32_t* -MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const -{ +MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_RegMask; else if (!Subtarget.hasMips64()) @@ -78,7 +77,7 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const return CSR_N32_RegMask; assert(Subtarget.isABI_N64()); - return CSR_N64_RegMask; + return CSR_N64_RegMask; } BitVector MipsRegisterInfo:: @@ -212,7 +211,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex)) + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || + MipsFI->isGlobalRegFI(FrameIndex)) Offset = spOffset; else Offset = spOffset + (int64_t)stackSize; @@ -226,37 +226,17 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, if (!MI.isDebugValue() && !isInt<16>(Offset)) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = Subtarget.isABI_N64() ? 64 : 32; - unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + MipsAnalyzeImmediate::Inst LastInst(0, 0); MipsFI->setEmitNOAT(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq except for the last one. - for (++Inst; Inst != Seq.end() - 1; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - + Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, + &LastInst); BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); FrameReg = ATReg; - Offset = SignExtend64<16>(Inst->ImmOpnd); + Offset = SignExtend64<16>(LastInst.ImmOpnd); } MI.getOperand(i).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 6d3f83f506..f320baed64 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,7 +42,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 835ac6d05b..c5d6bf9811 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -61,8 +61,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, bool MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { + TargetSubtargetInfo::AntiDepBreakMode &Mode, + RegClassVector &CriticalPathRCs) const { Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); CriticalPathRCs.push_back(hasMips64() ? diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 8b67572348..78e80148f2 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -116,7 +116,7 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - PM->add(createMipsISelDag(getMipsTargetMachine())); + addPass(createMipsISelDag(getMipsTargetMachine())); return false; } @@ -124,12 +124,18 @@ bool MipsPassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsPassConfig::addPreEmitPass() { - PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine())); + MipsTargetMachine &TM = getMipsTargetMachine(); + addPass(createMipsDelaySlotFillerPass(TM)); + + // NOTE: long branch has not been implemented for mips16. + if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding()) + addPass(createMipsLongBranchPass(TM)); + // @LOCALMOD-START if (getMipsSubtarget().isTargetNaCl()) { // This pass does all the heavy sfi lifting. - PM->add(createMipsNaClRewritePass()); + addPass(createMipsNaClRewritePass()); } // @LOCALMOD-END diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 80c00e80f1..5cbf057416 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -69,9 +69,7 @@ namespace llvm { // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE); - + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); }; /// MipsebTargetMachine - Mips32 big endian target machine. diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index a32a78ac83..7cb16b4dd8 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -27,6 +27,7 @@ set(NVPTXCodeGen_sources add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) +add_dependencies(LLVMNVPTXCodeGen intrinsics_gen) add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 696f459ce2..f2b96163f4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXRegisterInfo.h" -#include "NVPTXAsmPrinter.h" +#include "NVPTXUtilities.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTXNumRegisters.h" -#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/GlobalVariable.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -36,17 +37,13 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/DerivedTypes.h" -#include "NVPTXUtilities.h" #include "llvm/Support/TimeValue.h" -#include <sstream> #include "llvm/Support/CommandLine.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Path.h" #include "llvm/Assembly/Writer.h" #include "cl_common_defines.h" - - +#include <sstream> using namespace llvm; @@ -1914,7 +1911,9 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'r': break; } diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 84c7232236..56b237252d 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" +#include "NVPTXLowerAggrCopies.h" #include "llvm/Constants.h" -#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "NVPTXLowerAggrCopies.h" #include "llvm/Target/TargetData.h" -#include "llvm/LLVMContext.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 826b1dd34b..433f415a87 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -120,11 +120,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } bool NVPTXPassConfig::addInstSelector() { - PM->add(createLowerAggrCopies()); - PM->add(createSplitBBatBarPass()); - PM->add(createAllocaHoisting()); - PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - PM->add(createVectorElementizePass(getNVPTXTargetMachine())); + addPass(createLowerAggrCopies()); + addPass(createSplitBBatBarPass()); + addPass(createAllocaHoisting()); + addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + addPass(createVectorElementizePass(getNVPTXTargetMachine())); return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1d82e5c677..b3f9cace6b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -48,12 +48,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // bool DisableVerify, MCContext *&OutCtx); public: - //virtual bool addPassesToEmitFile(PassManagerBase &PM, - // formatted_raw_ostream &Out, - // CodeGenFileType FileType, - // CodeGenOpt::Level OptLevel, - // bool DisableVerify = true) ; - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 7204926526..192d18d664 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -29,6 +29,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) +add_dependencies(LLVMPowerPCCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 61d23ce06a..d175e3e79e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { - assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); unsigned Code = MI->getOperand(OpNo).getImm(); + if (!Modifier) { + unsigned CCReg = MI->getOperand(OpNo+1).getReg(); + unsigned RegNo; + switch (CCReg) { + default: llvm_unreachable("Unknown CR register"); + case PPC::CR0: RegNo = 0; break; + case PPC::CR1: RegNo = 1; break; + case PPC::CR2: RegNo = 2; break; + case PPC::CR3: RegNo = 3; break; + case PPC::CR4: RegNo = 4; break; + case PPC::CR5: RegNo = 5; break; + case PPC::CR6: RegNo = 6; break; + case PPC::CR7: RegNo = 7; break; + } + + // Print the CR bit number. The Code is ((BI << 5) | BO) for a + // BCC, but we must have the positive form here (BO == 12) + unsigned BI = Code >> 5; + assert((Code & 0xF) == 12 && + "BO in predicate bit must have the positive form"); + + unsigned Value = 4*RegNo + BI; + O << Value; + return; + } + if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { case PPC::PRED_ALWAYS: return; // Don't print anything for always. diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 73fd5342a1..8f1e211c3e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -42,7 +42,7 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier); + raw_ostream &O, const char *Modifier = 0); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 96c46451be..b7f1688436 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -50,6 +50,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", + "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; @@ -66,8 +68,10 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>; +def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; +def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603]>; @@ -90,10 +94,11 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, Feature64Bit - /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, @@ -101,7 +106,7 @@ def : Processor<"pwr6", G5Itineraries, def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index fb90600211..f76b89c803 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -22,8 +22,8 @@ #include "PPCSubtarget.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Assembly/Writer.h" @@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'c': // Don't print "$" before a global var name or constant. break; // PPC never has a prefix. case 'L': // Write second word of DImode reference. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5234da71a8..f50f9b5a33 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -32,6 +32,7 @@ #define DEBUG_TYPE "ctrloops" #include "PPC.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -82,13 +83,14 @@ namespace { /// getCanonicalInductionVariable - Check to see if the loop has a canonical /// induction variable. /// Should be defined in MachineLoop. Based upon version in class Loop. - MachineInstr *getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const; + void getCanonicalInductionVariable(MachineLoop *L, + SmallVector<MachineInstr *, 4> &IVars, + SmallVector<MachineInstr *, 4> &IOps) const; /// getTripCount - Return a loop-invariant LLVM register indicating the /// number of times the loop will be executed. If the trip-count cannot /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, bool &WordCmp, + CountValue *getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const; /// isInductionOperation - Return true if the instruction matches the @@ -175,12 +177,12 @@ namespace { /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &WordCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPLWI) { - WordCmp = true; +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { + if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; return true; - } else if (MI->getOpcode() == PPC::CMPDI || MI->getOpcode() == PPC::CMPLDI) { - WordCmp = false; + } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + SignedCmp = false; return true; } @@ -227,26 +229,27 @@ bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { /// the machine. /// This method assumes that the IndVarSimplify pass has been run by 'opt'. /// -MachineInstr -*PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const { +void +PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, + SmallVector<MachineInstr *, 4> &IVars, + SmallVector<MachineInstr *, 4> &IOps) const { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return 0; // dead loop + if (PI == TopMBB->pred_end()) return; // dead loop MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + if (PI != TopMBB->pred_end()) return; // multiple backedges? // make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) - return 0; + return; std::swap(Incoming, Backedge); } else if (!L->contains(Backedge)) - return 0; + return; // Loop over all of the PHI nodes, looking for a canonical induction variable: // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". @@ -263,13 +266,13 @@ MachineInstr // Check if the definition is an induction operation. MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); if (isInductionOperation(DI, DefReg)) { - IOp = DI; - return MPhi; + IOps.push_back(DI); + IVars.push_back(MPhi); } } } } - return 0; + return; } /// getTripCount - Return a loop-invariant LLVM value indicating the @@ -283,66 +286,100 @@ MachineInstr /// /// Based upon getTripCount in LoopInfo. /// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, +CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const { + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate a CTR loop if the loop has more than one exit. + if (LastMBB == 0) + return 0; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI->getOpcode() != PPC::BCC) + return 0; + + // We need to make sure that this compare is defining the condition + // register actually used by the terminating branch. + + unsigned PredReg = LastI->getOperand(1).getReg(); + DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); + + unsigned PredCond = LastI->getOperand(0).getImm(); + if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) + return 0; + // Check that the loop has a induction variable. - MachineInstr *IOp; - MachineInstr *IV_Inst = getCanonicalInductionVariable(L, IOp); - if (IV_Inst == 0) return 0; - - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } + SmallVector<MachineInstr *, 4> IVars, IOps; + getCanonicalInductionVariable(L, IVars, IOps); + for (unsigned i = 0; i < IVars.size(); ++i) { + MachineInstr *IOp = IOps[i]; + MachineInstr *IV_Inst = IVars[i]; + + // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, WordCmp)) { - OldInsts.push_back(MI); - OldInsts.push_back(IOp); - - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - int64_t ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); - - if (ImmVal == 0) { - // Make sure the induction variable changes by one on each iteration. - if (iv_value != 1 && iv_value != -1) { - return 0; - } - return new CountValue(InitialValue->getReg(), iv_value > 0); - } else { + DEBUG(dbgs() << "Considering:\n"); + DEBUG(dbgs() << " induction operation: " << *IOp); + DEBUG(dbgs() << " induction variable: " << *IV_Inst); + DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + bool SignedCmp; + MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + MI->getOperand(0).getReg() == PredReg) { + + OldInsts.push_back(MI); + OldInsts.push_back(IOp); + + DEBUG(dbgs() << " compare: " << *MI); + + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be an immediate"); + + int64_t ImmVal; + if (SignedCmp) + ImmVal = (short) MO.getImm(); + else + ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); + assert(InitialValue->isReg() && "Expecting register for init value"); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); - + unsigned InitialValueReg = InitialValue->getReg(); + + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = DefInstr->getOperand(2).getImm(); + int64_t start = (short) DefInstr->getOperand(2).getImm(); const MachineInstr *DefInstr2 = MRI->getVRegDef(DefInstr->getOperand(0).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { - start |= DefInstr2->getOperand(1).getImm() << 16; + DEBUG(dbgs() << " initial constant: " << *DefInstr); + DEBUG(dbgs() << " initial constant: " << *DefInstr2); + start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; + int64_t count = ImmVal - start; if ((count % iv_value) != 0) { return 0; @@ -351,12 +388,23 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { - int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); + DEBUG(dbgs() << " initial constant: " << *DefInstr); + + int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } return new CountValue(count/iv_value); + } else if (iv_value == 1 || iv_value == -1) { + // We can't determine a constant starting value. + if (ImmVal == 0) { + return new CountValue(InitialValueReg, iv_value > 0); + } + // FIXME: handle non-zero end value. } + // FIXME: handle non-unit increments (we might not want to introduce division + // but we can handle some 2^n cases with shifts). + } } } @@ -524,10 +572,9 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { return Changed; } - bool WordCmp; SmallVector<MachineInstr *, 2> OldInsts; // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, WordCmp, OldInsts); + CountValue *TripCount = getTripCount(L, OldInsts); if (TripCount == 0) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; @@ -575,14 +622,21 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>(); bool isPPC64 = Subtarget.isPPC64(); + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + unsigned CountReg; if (TripCount->isReg()) { // Create a copy of the loop count register. - const TargetRegisterClass *RC = + const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); + unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + (unsigned) PPC::EXTSW_32_64 : + (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, - TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); + TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); @@ -590,26 +644,12 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), CountReg).addReg(CountReg1); } - - // On a 64-bit system, if the original comparison was only 32-bit, then - // mask out the higher-order part of the count. - if (isPPC64 && WordCmp) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(PPC::RLDICL), CountReg).addReg(CountReg1 - ).addImm(0).addImm(32); - } } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Put the trip count in a register for transfer into the count register. - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; int64_t CountImm = TripCount->getImm(); - if (TripCount->isNeg()) - CountImm = -CountImm; + assert(!TripCount->isNeg() && "Constant trip count must be positive"); CountReg = MF->getRegInfo().createVirtualRegister(RC); if (CountImm > 0xFFFF) { @@ -665,6 +705,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); // Conditional branch; just delete it. + DEBUG(dbgs() << "Removing old branch: " << *LastI); LastMBB->erase(LastI); delete TripCount; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index be172c2435..c24afa908d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -368,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) @@ -383,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } } else { // PPC64. @@ -401,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) .addReg(PPC::X0) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) @@ -416,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b777f9313c..a00f686adc 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -111,6 +111,23 @@ namespace { /// immediate field. Because preinc imms have already been validated, just /// accept it. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { + if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) { + Out = N; + return true; + } + + return false; + } + + /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc + /// index field. Because preinc imms have already been validated, just + /// accept it. + bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { + if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) + return false; + Out = N; return true; } @@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - // FIXME: PPC64 return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); } else { - llvm_unreachable("R+R preindex loads not supported yet!"); + unsigned Opcode; + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; + if (LD->getValueType(0) != MVT::i64) { + // Handle PPC32 integer and normal FP loads. + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::f64: Opcode = PPC::LFDUX; break; + case MVT::f32: Opcode = PPC::LFSUX; break; + case MVT::i32: Opcode = PPC::LWZUX; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX; break; + } + } else { + assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); + assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && + "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::i64: Opcode = PPC::LDUX; break; + case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX8; break; + } + } + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = { Offset, Base, Chain }; + return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 964d5a0d94..13250b33ea 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -66,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { + const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); setPow2DivIsCheap(); @@ -75,7 +76,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); + bool isPPC64 = Subtarget->isPPC64(); + setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); @@ -132,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { + if (!Subtarget->hasFSQRT()) { setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); } @@ -228,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (Subtarget->isSVR4ABI()) { + if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); @@ -273,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { + if (Subtarget->has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -292,7 +294,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } - if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or @@ -308,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { + if (Subtarget->hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -378,6 +380,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -391,7 +394,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + if (Subtarget->has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); @@ -400,7 +403,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); setExceptionSelectorRegister(PPC::X4); @@ -417,7 +420,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BSWAP); // Darwin long double math library functions have $LDBL128 appended. - if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { + if (Subtarget->isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -434,6 +437,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget->isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); setSchedulingPreference(Sched::Hybrid); @@ -1105,7 +1113,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - // TODO: Check reg+reg first. + if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } // LDU/STU use reg+imm*4, others use reg+imm. if (VT != MVT::i64) { @@ -4933,11 +4944,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_CC_F4 || - MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { + if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8)) { + unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? + PPC::ISEL8 : PPC::ISEL; + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + // The SelectPred is ((BI << 5) | BO) for a BCC + unsigned BO = SelectPred & 0xF; + assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); + + unsigned TrueOpNo, FalseOpNo; + if (BO == 12) { + TrueOpNo = 2; + FalseOpNo = 3; + } else { + TrueOpNo = 3; + FalseOpNo = 2; + SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + } + + BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(TrueOpNo).getReg()) + .addReg(MI->getOperand(FalseOpNo).getReg()) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -5873,6 +5910,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than +/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to +/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd +/// is expanded to mul + add. +bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + return true; + default: + break; + } + + return false; +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref) return TargetLowering::getSchedulingPreference(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 973800b461..b0a013b4b4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -366,6 +366,12 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT VT) const; + private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 562ae7da0b..a2bd55f533 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -497,6 +497,10 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; +def ISEL8 : AForm_1<31, 15, + (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; } // End FXU Operations. @@ -533,6 +537,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp NoEncode<"$ea_result">; // NO LWAU! +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } // Zero extending loads. @@ -572,6 +586,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -607,6 +637,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "ldux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } def : Pat<(PPCload ixaddr:$src), @@ -680,10 +715,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, isPPC64; -let mayStore = 1 in -def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTD, - []>, isPPC64; + +def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti32 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 6c0f3d3f06..b0b8423281 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fsub V_immneg0, - (fsub (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB)))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, + (fneg VRRC:$vB))))]>; def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 28b3bc1596..47f09dca77 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -79,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } + +// Detect 32 -> 64-bit extensions where we may reuse the low sub-register. +bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: return false; + case PPC::EXTSW: + case PPC::EXTSW_32_64: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SubIdx = PPC::sub_32; + return true; + } +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7d49aa129e..374213ea43 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -92,6 +92,9 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e4af8846df..9b390461d8 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -323,7 +323,7 @@ def memri : Operand<iPTR> { } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc, ptr_rc); + let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; @@ -349,10 +349,10 @@ def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; +def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -711,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lfd $rD, $addr", LdStLFD, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + + +// Indexed (r+r) Loads with Update (preinc). +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfsux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfdux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -822,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; - -let mayStore = 1 in { -def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStStore, - []>; -} + +def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), + (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfsux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), + (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -1236,51 +1311,43 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, + (fneg F8RC:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, + (fneg F4RC:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1331,6 +1398,13 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. + def ISEL : AForm_1<31, 15, + (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; +} + +let PPC970_Unit = 1 in { // FXU Operations. // M-Form instructions. rotate and mask instructions. // let isCommutable = 1 in { @@ -1441,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS GPRC:$in, tblockaddress:$g)>; -// Fused negative multiply subtract, alternate pattern -def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), - (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, - Requires<[FPContractions]>; -def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), - (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, - Requires<[FPContractions]>; - // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index ecb8754cbc..ab8bf1f93a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -199,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } +bool +PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + switch (RC->getID()) { + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return true; + default: + return false; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -328,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // address of new allocated space. if (LP64) { if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(Reg, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -349,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, .addImm(maxCallFrameSize) .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); } else { - BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(Reg, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 78e17c6890..152c36d699 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -47,6 +47,8 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// requiresRegisterScavenging - We require a register scavenger. /// FIXME (64-bit): Should be inlined. bool requiresRegisterScavenging(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c085ba26dd..bb193ac3d9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -38,6 +38,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasAltivec(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasISEL(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 7d9be55713..0207c83393 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -70,6 +70,7 @@ protected: bool HasAltivec; bool HasFSQRT; bool HasSTFIWX; + bool HasISEL; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -141,6 +142,7 @@ public: bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } bool hasMFOCRF() const { return HasMFOCRF; } + bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index d7a808855b..980511268a 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -98,31 +98,25 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { bool PPCPassConfig::addPreRegAlloc() { if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - PM->add(createPPCCTRLoops()); + addPass(createPPCCTRLoops()); return false; } bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createPPCISelDag(getPPCTargetMachine())); + addPass(createPPCISelDag(getPPCTargetMachine())); return false; } bool PPCPassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createPPCBranchSelectionPass()); + addPass(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { - // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - Options.DisableJumpTables = true; - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? Subtarget.SetJITMode(); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 093255e6af..cbfa4cf35b 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -964,6 +964,12 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } +The & 15 part should be optimized away, it doesn't change the result. Currently +not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index ae4af0f442..efb10db4c0 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -23,5 +23,7 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) +add_dependencies(LLVMSparcCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index c14b3d4a00..25548625e7 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -187,7 +187,9 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'r': break; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 77fd2af88d..9ee12ed7f5 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -60,7 +60,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { } bool SparcPassConfig::addInstSelector() { - PM->add(createSparcISelDag(getSparcTargetMachine())); + addPass(createSparcISelDag(getSparcTargetMachine())); return false; } @@ -68,8 +68,8 @@ bool SparcPassConfig::addInstSelector() { /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. bool SparcPassConfig::addPreEmitPass(){ - PM->add(createSparcFPMoverPass(getSparcTargetMachine())); - PM->add(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createSparcFPMoverPass(getSparcTargetMachine())); + addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index e785d330ae..ffc1d9f0d1 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/GlobalAlias.h" #include "llvm/GlobalValue.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Target/TargetMachine.h" @@ -90,26 +92,59 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +/// Get the IR-specified TLS model for Var. +static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { + switch (Var->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: + llvm_unreachable("getSelectedTLSModel for non-TLS variable"); + break; + case GlobalVariable::GeneralDynamicTLSModel: + return TLSModel::GeneralDynamic; + case GlobalVariable::LocalDynamicTLSModel: + return TLSModel::LocalDynamic; + case GlobalVariable::InitialExecTLSModel: + return TLSModel::InitialExec; + case GlobalVariable::LocalExecTLSModel: + return TLSModel::LocalExec; + } + llvm_unreachable("invalid TLS model"); +} + TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + const GlobalVariable *Var = cast<GlobalVariable>(GV); + + bool isLocal = Var->hasLocalLinkage(); + bool isDeclaration = Var->isDeclaration(); + bool isPIC = getRelocationModel() == Reloc::PIC_; + bool isPIE = Options.PositionIndependentExecutable; // FIXME: what should we do for protected and internal visibility? // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); + bool isHidden = Var->hasHiddenVisibility(); - if (getRelocationModel() == Reloc::PIC_ && - !ForceTLSNonPIC && // @LOCALMOD - !Options.PositionIndependentExecutable) { + TLSModel::Model Model; + if (isPIC && !isPIE && + !ForceTLSNonPIC) { // @LOCALMOD if (isLocal || isHidden) - return TLSModel::LocalDynamic; + Model = TLSModel::LocalDynamic; else - return TLSModel::GeneralDynamic; + Model = TLSModel::GeneralDynamic; } else { if (!isDeclaration || isHidden) - return TLSModel::LocalExec; + Model = TLSModel::LocalExec; else - return TLSModel::InitialExec; + Model = TLSModel::InitialExec; } + + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(Var); + if (SelectedModel > Model) + return SelectedModel; + + return Model; } /// getOptLevel - Returns the optimization level: None, Less, @@ -143,4 +178,3 @@ void TargetMachine::setFunctionSections(bool V) { void TargetMachine::setDataSections(bool V) { DataSections = V; } - diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c388..417842b467 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -187,7 +187,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } virtual void print(raw_ostream &OS) const {} @@ -309,25 +309,25 @@ struct X86Operand : public MCParsedAsmOperand { } bool isMem() const { return Kind == Memory; } - bool isMem8() const { + bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } - bool isMem16() const { + bool isMem16() const { return Kind == Memory && (!Mem.Size || Mem.Size == 16); } - bool isMem32() const { + bool isMem32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32); } - bool isMem64() const { + bool isMem64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64); } - bool isMem80() const { + bool isMem80() const { return Kind == Memory && (!Mem.Size || Mem.Size == 80); } - bool isMem128() const { + bool isMem128() const { return Kind == Memory && (!Mem.Size || Mem.Size == 128); } - bool isMem256() const { + bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } @@ -356,26 +356,26 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -467,7 +467,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && + return Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && @@ -611,7 +611,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); - + if (getLexer().is(AsmToken::Identifier)) { // Parse BaseReg if (ParseRegister(BaseReg, Start, End)) { @@ -668,7 +668,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, End = Parser.getTok().getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) return 0; } } @@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. if (BaseReg != 0 && IndexReg != 0) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && IndexReg != X86::RIZ) { Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); return 0; } if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && IndexReg != X86::EIZ){ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); return 0; @@ -944,7 +947,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if (PatchedName.startswith("set") && PatchedName.endswith("b") && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); - + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -1217,7 +1220,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } } - + // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { @@ -1520,7 +1523,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. + // individual transformations can chain off each other. while (processInstruction(Inst, Operands)) ; @@ -1558,12 +1561,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; - + // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); @@ -1691,19 +1694,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { const MCExpr *Value; if (getParser().ParseExpression(Value)) return true; - + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - + if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); Parser.Lex(); } } - + Parser.Lex(); return false; } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 5b402da3ad..45fd42f205 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -53,6 +53,8 @@ endif() add_llvm_target(X86CodeGen ${sources}) +add_dependencies(LLVMX86CodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13a00620b..d58e36c803 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -498,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { baseReg = MCOperand::CreateReg(0); } - + + // Check whether we are handling VSIB addressing mode for GATHER. + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and + // we should use SIB_INDEX_XMM4|YMM4 for VSIB. + // I don't see a way to get the correct IndexReg in readSIB: + // We can tell whether it is VSIB or SIB after instruction ID is decoded, + // but instruction ID may not be decoded yet when calling readSIB. + uint32_t Opcode = mcInst.getOpcode(); + bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm || + Opcode == X86::VPGATHERDQrm || + Opcode == X86::VPGATHERDQYrm || + Opcode == X86::VPGATHERQQrm || + Opcode == X86::VPGATHERDDrm || + Opcode == X86::VPGATHERQDrm); + bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VPGATHERQQYrm || + Opcode == X86::VPGATHERDDYrm || + Opcode == X86::VPGATHERQDYrm); + if (IndexIs128 || IndexIs256) { + unsigned IndexOffset = insn.sibIndex - + (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); + SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + insn.sibIndex = (SIBIndex)(IndexBase + + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); + } + if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -509,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, indexReg = MCOperand::CreateReg(X86::x); break; EA_BASES_32BIT EA_BASES_64BIT + REGS_XMM + REGS_YMM #undef ENTRY } } else { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b45d..e2caf6a2a8 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -310,11 +310,14 @@ typedef enum { * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM. */ typedef enum { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM #undef ENTRY SIB_INDEX_max } SIBIndex; diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index c3f46ebda0..b0e5be3162 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -483,17 +483,17 @@ namespace X86II { // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(uint64_t TSFlags) { + inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { + inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8: @@ -508,7 +508,7 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { + inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8PCRel: @@ -531,7 +531,7 @@ namespace X86II { /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only /// counted as one operand. /// - static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { + inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: // FIXME: Remove this form. @@ -594,7 +594,7 @@ namespace X86II { /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. - static inline bool isX86_64ExtendedReg(unsigned RegNo) { + inline bool isX86_64ExtendedReg(unsigned RegNo) { switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: @@ -616,7 +616,7 @@ namespace X86II { return false; } - static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + inline bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 61e2fdcb62..7f7873acd1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ee66e7ce1c..599c8f8c6d 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,10 +20,10 @@ #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -436,7 +436,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index af9efbd906..e263e44f40 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -52,7 +52,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || - MMI.callsUnwindInit()); + MMI.callsUnwindInit() || MMI.callsEHReturn()); } static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { @@ -652,7 +652,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -916,18 +915,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, UseLEA, TII, *RegInfo); - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. - unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); - } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); @@ -1184,16 +1171,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); - if (RegInfo->hasBasePointer(MF)) { - assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); - if (FI < 0) { - // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); - } else { - assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); - return Offset + StackSize; - } - } else if (RegInfo->needsStackRealignment(MF)) { + if (RegInfo->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. return Offset + RegInfo->getSlotSize(); @@ -1224,14 +1202,9 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); // We can't calculate offset from frame pointer if the stack is realigned, - // so enforce usage of stack/base pointer. The base pointer is used when we - // have dynamic allocas in addition to dynamic realignment. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else if (RegInfo->needsStackRealignment(MF)) - FrameReg = RegInfo->getStackRegister(); - else - FrameReg = RegInfo->getFrameRegister(MF); + // so enforce usage of stack pointer. + FrameReg = (RegInfo->needsStackRealignment(MF)) ? + RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } @@ -1368,10 +1341,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; } - - // Spill the BasePtr if it's used. - if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); } static bool diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c8ff1cf0d0..2871a790c6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -188,6 +188,7 @@ namespace { private: SDNode *Select(SDNode *N); + SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -2165,6 +2166,30 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { + // Operands of Gather: VSrc, Base, VIdx, VMask, Scale + SDValue Chain = Node->getOperand(0); + SDValue VSrc = Node->getOperand(2); + SDValue Base = Node->getOperand(3); + SDValue VIdx = Node->getOperand(4); + SDValue VMask = Node->getOperand(5); + ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); + if (!Scale) + return 0; + + // Memory Operands: Base, Scale, Index, Disp, Segment + SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Segment = CurDAG->getRegister(0, MVT::i32); + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + Disp, Segment, VMask, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + VSrc.getValueType(), MVT::Other, + Ops, array_lengthof(Ops)); + return ResNode; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2180,23 +2205,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: break; + case Intrinsic::x86_avx2_gather_d_pd: + case Intrinsic::x86_avx2_gather_d_pd_256: + case Intrinsic::x86_avx2_gather_q_pd: + case Intrinsic::x86_avx2_gather_q_pd_256: + case Intrinsic::x86_avx2_gather_d_ps: + case Intrinsic::x86_avx2_gather_d_ps_256: + case Intrinsic::x86_avx2_gather_q_ps: + case Intrinsic::x86_avx2_gather_q_ps_256: + case Intrinsic::x86_avx2_gather_d_q: + case Intrinsic::x86_avx2_gather_d_q_256: + case Intrinsic::x86_avx2_gather_q_q: + case Intrinsic::x86_avx2_gather_q_q_256: + case Intrinsic::x86_avx2_gather_d_d: + case Intrinsic::x86_avx2_gather_d_d_256: + case Intrinsic::x86_avx2_gather_q_d: + case Intrinsic::x86_avx2_gather_q_d_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; + case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; + case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; + case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; + case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; + case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; + case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; + case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; + case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; + case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; + case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; + case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; + case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; + case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; + case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; + case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; + } + SDNode *RetVal = SelectGather(Node, Opc); + if (RetVal) + return RetVal; + break; + } + } + break; + } case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); + case X86ISD::ATOMSWAP64_DAG: { + unsigned Opc; + switch (Opcode) { + default: llvm_unreachable("Impossible intrinsic"); + case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; + case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; + case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; + case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; + case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; + case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; + } + SDNode *RetVal = SelectAtomic64(Node, Opc); + if (RetVal) + return RetVal; + break; + } case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bf559c98dd..4197c35adb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -99,6 +99,10 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { + // Inserting UNDEF is Result + if (Vec.getOpcode() == ISD::UNDEF) + return Result; + EVT VT = Vec.getValueType(); assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); @@ -114,9 +118,8 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, - VecIdx); - return Result; + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); } /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 @@ -136,10 +139,13 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { if (Subtarget->isTargetEnvMacho()) { if (is64Bit) - return new X8664_MachoTargetObjectFile(); + return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); } + if (Subtarget->isTargetLinux()) + return new X86LinuxTargetObjectFile(); + // @LOCALMOD-BEGIN if (Subtarget->isTargetNaCl()) return new TargetLoweringObjectFileNaCl(); @@ -3536,6 +3542,52 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { return true; } +// +// Some special combinations that can be optimized. +// +static +SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + DebugLoc dl = SVOp->getDebugLoc(); + + if (VT != MVT::v8i32 && VT != MVT::v8f32) + return SDValue(); + + ArrayRef<int> Mask = SVOp->getMask(); + + // These are the special masks that may be optimized. + static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; + static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; + bool MatchEvenMask = true; + bool MatchOddMask = true; + for (int i=0; i<8; ++i) { + if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) + MatchEvenMask = false; + if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) + MatchOddMask = false; + } + static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; + static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; + + const int *CompactionMask; + if (MatchEvenMask) + CompactionMask = CompactionMaskEven; + else if (MatchOddMask) + CompactionMask = CompactionMaskOdd; + else + return SDValue(); + + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); + + SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), + UndefNode, CompactionMask); + SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), + UndefNode, CompactionMask); + static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, @@ -5041,8 +5093,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { SDValue Sc = Op.getOperand(0); if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && - Sc.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); + Sc.getOpcode() != ISD::BUILD_VECTOR) { + + if (!Subtarget->hasAVX2()) + return SDValue(); + + // Use the register form of the broadcast instruction available on AVX2. + if (VT.is256BitVector()) + Sc = Extract128BitVector(Sc, 0, DAG, dl); + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); + } Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || @@ -6022,6 +6082,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + + SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG); + if (NewOp.getNode()) + return NewOp; + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); @@ -7504,11 +7569,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); // @LOCALMOD-START @@ -9995,7 +10055,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); @@ -10015,7 +10074,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - MF.getRegInfo().addLiveOut(StoreAddrReg); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, @@ -16240,12 +16298,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. - if (VT == MVT::f32) + + if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; - else if (VT == MVT::f64) + else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; else if (X86::VR128RegClass.hasType(VT)) Res.second = &X86::VR128RegClass; + else if (X86::VR256RegClass.hasType(VT)) + Res.second = &X86::VR256RegClass; } return Res; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index fa1d67644d..aaef4a466d 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -55,11 +55,11 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } - - + + void getFullAddress(SmallVectorImpl<MachineOperand> &MO) { assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); - + if (BaseType == X86AddressMode::RegBase) MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, false, false, false, 0, false)); @@ -67,16 +67,16 @@ struct X86AddressMode { assert(BaseType == X86AddressMode::FrameIndexBase); MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); } - + MO.push_back(MachineOperand::CreateImm(Scale)); MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, false, false, false, 0, false)); - + if (GV) MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); else MO.push_back(MachineOperand::CreateImm(Disp)); - + MO.push_back(MachineOperand::CreateReg(0, false, false, false, false, false, 0, false)); } @@ -122,7 +122,7 @@ static inline const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else { @@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB, MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else MIB.addImm(AM.Disp); - + return MIB.addReg(0); } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index cbec891d7e..bebe5f033c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -367,6 +367,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. +// SSDI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dabb181cce..cb926f63a4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load", enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 7) + // (stored in bits 0 - 3) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, - TB_INDEX_MASK = 0xff, - - // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) - TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + TB_INDEX_MASK = 0xf, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 16, + TB_NO_REVERSE = 1 << 4, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 17, + TB_NO_FORWARD = 1 << 5, - TB_FOLDED_LOAD = 1 << 18, - TB_FOLDED_STORE = 1 << 19 + TB_FOLDED_LOAD = 1 << 6, + TB_FOLDED_STORE = 1 << 7, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; struct X86OpTblEntry { uint16_t RegOp; uint16_t MemOp; - uint32_t Flags; + uint16_t Flags; }; X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -415,14 +415,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -499,14 +495,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, - { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, @@ -815,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, - { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, - { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, - { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5ae6b99e5a..4006dad684 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -145,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo { std::pair<unsigned, unsigned> > MemOp2RegOpTableType; MemOp2RegOpTableType MemOp2RegOpTable; - void AddTableEntry(RegOp2MemOpTableType &R2MTable, - MemOp2RegOpTableType &M2RTable, - unsigned RegOp, unsigned MemOp, unsigned Flags); + static void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 892115b77e..0edd10a355 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -333,6 +333,12 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } +def v128mem : X86MemOperand<"printf128mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86Mem128AsmOperand; } +def v256mem : X86MemOperand<"printf256mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86Mem256AsmOperand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 56542494b2..5319455dc5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1418,10 +1418,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, OpndItins itins> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], itins.rr, d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], itins.rm, d>; } @@ -1622,7 +1622,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; @@ -1630,14 +1630,16 @@ defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; } -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS; @@ -1646,8 +1648,8 @@ defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, - TB; /* PD SSE3 form is avaiable */ + SSEPackedSingle, SSE_CVT_PS>, TB, + Requires<[HasSSE2]>; } let Predicates = [HasAVX] in { @@ -1788,56 +1790,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -// Convert doubleword to packed single/double fp -// SSE2 instructions without OpSize prefix -def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, Requires<[HasSSE2]>; - -// SSE2 instructions with XS prefix -def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, Requires<[HasSSE2]>; -def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, Requires<[HasSSE2]>; - - // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], @@ -1858,51 +1810,63 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PS_RM>; -def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, - VEX; -def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>; - -// SSE2 packed instructions with XD prefix -def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, VEX, Requires<[HasAVX]>; -def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, VEX, Requires<[HasAVX]>; -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, Requires<[HasSSE2]>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (VCVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (VCVTPS2DQrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (CVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (CVTPS2DQrm addr:$src)>; +} + +// Convert Packed Double FP to Packed DW Integers +let Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTPD2DQrr VR128:$dst, VR128:$src)>; +def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", + (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; +} + +def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; +def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (VCVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (VCVTPD2DQXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (CVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (CVTPD2DQrm addr:$src)>; +} // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1914,7 +1878,7 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))], + (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1935,14 +1899,19 @@ def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))], + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_VCVTDQ2PSrr VR128:$src)>; + (VCVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_VCVTDQ2PSrm addr:$src)>; + (VCVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -1962,9 +1931,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; + (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_CVTDQ2PSrm addr:$src)>; + (CVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (CVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (CVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>; @@ -1977,12 +1951,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -let isCodeGenOnly = 1 in -def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], @@ -1990,31 +1959,38 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + "cvttpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; + +let Predicates = [HasAVX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2032,35 +2008,71 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; } + +let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (VCVTPS2PDrr VR128:$src)>; +} -def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, Requires<[HasSSE2]>; +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (CVTPS2PDrr VR128:$src)>; +} + +// Convert Packed DW Integers to Packed Double FP +let Predicates = [HasAVX] in { +def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; + +// 128 bit register conversion intrinsics +let Predicates = [HasAVX] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (VCVTDQ2PDrr VR128:$src)>; + +let Predicates = [HasSSE2] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (CVTDQ2PDrr VR128:$src)>; + +// AVX 256-bit register conversion intrinsics +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), + (VCVTDQ2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), + (VCVTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTPD2DQYrm addr:$src)>; + + def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (VCVTDQ2PDYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed double to packed single // The assembler can recognize rr 256-bit instructions by seeing a ymm @@ -2069,25 +2081,24 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; -def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", + (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", + (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; + def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>; @@ -2096,64 +2107,60 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>; -def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; -def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (VCVTPD2PSXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (CVTPD2PSrm addr:$src)>; +} // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. -def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), - (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), - (VCVTDQ2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - -// Match fround and fextend for 128/256-bit conversions -def : Pat<(v4f32 (fround (v4f64 VR256:$src))), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), - (VCVTPS2PDYrm addr:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), + (VCVTDQ2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), + (VCVTPS2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTPS2DQYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), + (VCVTPS2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTTPD2DQYrm addr:$src)>; + + // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions @@ -4889,80 +4896,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; //===---------------------------------------------------------------------===// -// SSE3 - Conversion Instructions -//===---------------------------------------------------------------------===// - -// Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX] in { -// The assembler can recognize rr 256-bit instructions by seeing a ymm -// register, but the same isn't true when using memory operands instead. -// Provide other assembly rr and rm forms to address this explicitly. -def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; - -// XMM only -def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; - -// YMM only -def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} - -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; - -def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTTPD2DQYrm addr:$src)>; - -// Convert Packed DW Integers to Packed Double FP -let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -} - -def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; - -// AVX 256-bit register conversion intrinsics -def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - -def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (VCVTDQ2PDYrm addr:$src)>; - -//===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, @@ -7339,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in { int_x86_avx2_vbroadcast_ss_ps_256>; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; +def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7751,6 +7684,31 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VPBROADCASTQYrm addr:$src)>; + def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBrr VR128:$src)>; + def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBYrr VR128:$src)>; + def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWrr VR128:$src)>; + def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWYrr VR128:$src)>; + def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDrr VR128:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDYrr VR128:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQYrr VR128:$src)>; + def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSrr VR128:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSYrr VR128:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), + (VBROADCASTSDYrr VR128:$src)>; + // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { @@ -7761,7 +7719,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), @@ -7771,7 +7729,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; } } @@ -8061,3 +8019,55 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +multiclass avx2_gather<bits<8> opc, string OpcodeStr, + RegisterClass RC256, X86MemOperand memop256, + Intrinsic IntGather128, Intrinsic IntGather256> { + def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, v128mem:$src2, VR128:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3; + def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst), + (ins RC256:$src1, memop256:$src2, RC256:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + VR256, v128mem, + int_x86_avx2_gather_d_pd, + int_x86_avx2_gather_d_pd_256>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + VR256, v256mem, + int_x86_avx2_gather_q_pd, + int_x86_avx2_gather_q_pd_256>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + VR256, v256mem, + int_x86_avx2_gather_d_ps, + int_x86_avx2_gather_d_ps_256>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + VR128, v256mem, + int_x86_avx2_gather_q_ps, + int_x86_avx2_gather_q_ps_256>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", + VR256, v128mem, + int_x86_avx2_gather_d_q, + int_x86_avx2_gather_d_q_256>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", + VR256, v256mem, + int_x86_avx2_gather_q_q, + int_x86_avx2_gather_q_q_256>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", + VR256, v256mem, + int_x86_avx2_gather_d_d, + int_x86_avx2_gather_d_d_256>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", + VR128, v256mem, + int_x86_avx2_gather_q_d, + int_x86_avx2_gather_q_d_256>; +} diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 6a8f0c8486..6d3548f093 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -17,17 +17,17 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1be4c3864a..ed086dd8ad 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,10 +50,6 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt<bool> -EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - // @LOCALMOD-BEGIN extern cl::opt<bool> FlagUseZeroBasedSandbox; extern cl::opt<bool> FlagRestrictR15; @@ -77,12 +73,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; - BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; - BasePtr = X86::EBX; } } @@ -301,20 +295,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } - // Set the base-pointer register and its aliases as reserved if needed. - if (hasBasePointer(MF)) { - CallingConv::ID CC = MF.getFunction()->getCallingConv(); - const uint32_t* RegMask = getCallPreservedMask(CC); - if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) - report_fatal_error( - "Stack realignment in presence of dynamic allocas is not supported with" - "this calling convention."); - - Reserved.set(getBaseRegister()); - for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I) - Reserved.set(*I); - } - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -384,35 +364,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - if (!EnableBasePointer) - return false; - - // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) - return true; - - return false; -} - bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - if (!MF.getTarget().Options.RealignStack) - return false; - - // Stack realignment requires a frame pointer. If we already started - // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) - return false; - - // If base pointer is necessary. Check that it isn't too late to reserve it. - if (MFI->hasVarSizedObjects()) - return MRI->canReserveReg(BasePtr); - return true; + return (MF.getTarget().Options.RealignStack && + !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { @@ -422,6 +377,13 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); + // FIXME: Currently we don't support stack realignment for functions with + // variable-sized allocas. + // FIXME: It's more complicated than this... + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + report_fatal_error( + "Stack realignment in presence of dynamic allocas is not supported"); + // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); @@ -561,9 +523,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; - if (hasBasePointer(MF)) - BasePtr = getBaseRegister(); - else if (needsStackRealignment(MF)) + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else if (AfterFPPop) BasePtr = StackPtr; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 1bc32cbb78..ee69842b10 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -50,11 +50,6 @@ private: /// unsigned FramePtr; - /// BasePtr - X86 physical register used as a base ptr in complex stack - /// frames. I.e., when we need a 3rd base, not just SP and FP, due to - /// variable size stack objects. - unsigned BasePtr; - public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -111,8 +106,6 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; @@ -130,7 +123,6 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } - unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6d05a91a32..20acc2bab3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -150,44 +150,44 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { bool X86PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses. if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - PM->add(createCleanupLocalDynamicTLSPass()); + addPass(createCleanupLocalDynamicTLSPass()); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) - PM->add(createGlobalBaseRegPass()); + addPass(createGlobalBaseRegPass()); return false; } bool X86PassConfig::addPreRegAlloc() { - PM->add(createX86MaxStackAlignmentHeuristicPass()); + addPass(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } bool X86PassConfig::addPostRegAlloc() { - PM->add(createX86FloatingPointStackifierPass()); + addPass(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } bool X86PassConfig::addPreEmitPass() { bool ShouldPrint = false; if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) { - PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + addPass(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } if (getX86Subtarget().hasAVX() && UseVZeroUpper) { - PM->add(createX86IssueVZeroUpperPass()); + addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } // @LOCALMOD-START if (getX86Subtarget().isTargetNaCl()) { - PM->add(createX86NaClRewritePass()); + addPass(createX86NaClRewritePass()); ShouldPrint = true; } // @LOCALMOD-END diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 32bfba96bb..4f39d68d40 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -10,17 +10,19 @@ #include "X86TargetObjectFile.h" #include "X86TargetMachine.h" #include "X86Subtarget.h" // @LOCALMOD +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" // @LOCALMOD +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" using namespace llvm; using namespace dwarf; -const MCExpr *X8664_MachoTargetObjectFile:: +const MCExpr *X86_64MachoTargetObjectFile:: getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { @@ -39,12 +41,18 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } -MCSymbol *X8664_MachoTargetObjectFile:: +MCSymbol *X86_64MachoTargetObjectFile:: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } +void +X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + // @LOCALMOD-START // NOTE: this was largely lifted from // lib/Target/ARM/ARMTargetObjectFile.cpp diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 34c1234eae..5fac48e57a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -16,9 +16,9 @@ namespace llvm { - /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin + /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. - class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: virtual const MCExpr * getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, @@ -32,6 +32,12 @@ namespace llvm { MachineModuleInfo *MMI) const; }; + /// X86LinuxTargetObjectFile - This implementation is used for linux x86 + /// and x86-64. + class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + // @LOCALMOD-BEGIN class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF { public: diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 0d59572a0d..ca94f03a64 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -22,5 +22,7 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) +add_dependencies(LLVMXCoreCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 8906b2459e..c76866f47b 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -18,9 +18,9 @@ #include "XCoreSubtarget.h" #include "XCoreTargetMachine.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -260,7 +260,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { - printOperand(MI, OpNo, O); + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } + +printOperand(MI, OpNo, O); return false; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index b25a08d25c..b2f0603776 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -796,7 +796,7 @@ def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size), def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), "mkmsk $dst, $size", - [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>; + [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>; def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 5afd5a1aff..11ec86b0fa 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -55,7 +55,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { } bool XCorePassConfig::addInstSelector() { - PM->add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); + addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 58b3551cd7..3f6b1de614 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,3 +20,5 @@ add_llvm_library(LLVMipo StripDeadPrototypes.cpp StripSymbols.cpp ) + +add_dependencies(LLVMipo intrinsics_gen) diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 2b427aa6a4..18c1c7b000 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -65,7 +65,7 @@ bool GlobalDCE::runOnModule(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Functions with external linkage are needed if they have a body - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + if (!I->isDiscardableIfUnused() && !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) GlobalIsNeeded(I); } @@ -75,7 +75,7 @@ bool GlobalDCE::runOnModule(Module &M) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + if (!I->isDiscardableIfUnused() && !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) GlobalIsNeeded(I); } @@ -84,7 +84,7 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible aliases are needed. - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage()) + if (!I->isDiscardableIfUnused()) GlobalIsNeeded(I); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index d316d52678..4e1c23c198 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -254,6 +254,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::isStored; } } + } else if (isa<BitCastInst>(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa<GetElementPtrInst>(I)) { if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa<SelectInst>(I)) { @@ -517,7 +519,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), - GV->isThreadLocal(), + GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -550,7 +552,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), - GV->isThreadLocal(), + GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -866,7 +868,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, UndefValue::get(GlobalType), GV->getName()+".body", GV, - GV->isThreadLocal()); + GV->getThreadLocalMode()); // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update @@ -899,7 +901,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), - GV->getName()+".init", GV->isThreadLocal()); + GV->getName()+".init", GV->getThreadLocalMode()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. @@ -1321,7 +1323,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), GV, - GV->isThreadLocal()); + GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); unsigned TypeSize = TD->getTypeAllocSize(FieldTy); @@ -1567,8 +1569,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); - CI = dyn_cast<BitCastInst>(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc); + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc)) + CI = cast<CallInst>(BCI->getOperand(0)); + else + CI = cast<CallInst>(Malloc); } GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); @@ -1645,7 +1649,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", - GV->isThreadLocal()); + GV->getThreadLocalMode()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); @@ -1716,7 +1720,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// possible. If we make a change, return true. bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - if (!GV->hasLocalLinkage()) + if (!GV->isDiscardableIfUnused()) return false; // Do more involved optimizations if the global is internal. @@ -1729,6 +1733,9 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, return true; } + if (!GV->hasLocalLinkage()) + return false; + SmallPtrSet<const PHINode*, 16> PHIUsers; GlobalStatus GS; @@ -2049,7 +2056,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", - GCL->isThreadLocal()); + GCL->getThreadLocalMode()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); @@ -2705,7 +2712,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, << " stores.\n"); for (DenseMap<Constant*, Constant*>::const_iterator I = Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); - I != E; ++I) + I != E; ++I) CommitValueTo(I->second, I->first); for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I = Eval.getInvariants().begin(), E = Eval.getInvariants().end(); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 0b01c3822f..715a384adc 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -45,22 +45,22 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" +#include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index b5caa9a557..d8e8cf77dd 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -22,11 +22,11 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index d070ccc0d6..72cfe2c985 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,3 +13,5 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) + +add_dependencies(LLVMInstCombine intrinsics_gen) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 199df519ce..c2b0e03b40 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,11 +11,11 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 00f9974125..99b62f8d05 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -544,11 +544,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy()->isIntegerTy(1)) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); - // C-(X+C2) --> (C-C2)-X ConstantInt *C2; if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3bafc6661b..7d0af0d802 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -995,9 +995,11 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { std::swap(Op0Ordered, Op1Ordered); } if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq + // uno && ueq -> uno && (uno || eq) -> uno // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) + if (!Op0Ordered && (Op0Ordered == Op1Ordered)) + return LHS; + if (Op0Ordered && (Op0Ordered == Op1Ordered)) return RHS; // uno && oeq -> uno && (ord && eq) -> false diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 19776b1e09..f74cff85c6 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -172,8 +172,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); - if (extractMallocCall(&CI) || extractCallocCall(&CI)) - return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -246,84 +244,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - // We need target data for just about everything so depend on it. - if (!TD) return 0; - - Type *ReturnTy = CI.getType(); - uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; - - // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - - uint64_t Offset = 0; - uint64_t Size = -1ULL; - - // Try to look through constant GEPs. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) { - if (!GEP->hasAllConstantIndices()) return 0; - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return 0; - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - - Op1 = GEP->getPointerOperand()->stripPointerCasts(); - - // Make sure we're not a constant offset from an external - // global. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) - if (!GV->hasDefinitiveInitializer()) return 0; - } - - // If we've stripped down to a single global variable that we - // can know the size of then just return that. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - Size = TD->getTypeAllocSize(C->getType()); - } else { - // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); - return ReplaceInstUsesWith(CI, RetVal); - } - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { - // Get alloca size. - if (AI->getAllocatedType()->isSized()) { - Size = TD->getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); - if (!C) return 0; - Size *= C->getZExtValue(); - } - } - } else if (CallInst *MI = extractMallocCall(Op1)) { - // Get allocation size. - Value *Arg = MI->getArgOperand(0); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) - Size = CI->getZExtValue(); - - } else if (CallInst *MI = extractCallocCall(Op1)) { - // Get allocation size. - Value *Arg1 = MI->getArgOperand(0); - Value *Arg2 = MI->getArgOperand(1); - if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Arg1)) - if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Arg2)) - Size = (CI1->getValue() * CI2->getValue()).getZExtValue(); - } - - // Do not return "I don't know" here. Later optimization passes could - // make it possible to evaluate objectsize to a constant. - if (Size == -1ULL) - return 0; - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); - } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); + uint64_t Size; + if (getObjectSize(II->getArgOperand(0), Size, TD)) + return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); + return 0; } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -768,7 +692,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI) || isMalloc(BI)) { + if (isa<AllocaInst>(BI)) { CannotRemove = true; break; } @@ -955,6 +879,9 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { + if (isAllocLikeFn(CS.getInstruction())) + return visitMalloc(*CS.getInstruction()); + bool Changed = false; // If the callee is a pointer to a function, attempt to move any casts to the @@ -990,24 +917,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { } if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), - CS.getInstruction()); - // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) ReplaceInstUsesWith(*CS.getInstruction(), UndefValue::get(CS.getInstruction()->getType())); - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(Callee->getContext()), II); + if (isa<InvokeInst>(CS.getInstruction())) { + // Can't remove an invoke because we cannot change the CFG. + return 0; } + + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + return EraseInstFromFunction(*CS.getInstruction()); } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index d07be2c8b3..555b4428d2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -648,10 +648,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { if (!I) return false; // If the input is a truncate from the destination type, we can trivially - // eliminate it, even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + // eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -992,11 +990,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - // If this is a truncate from the dest type, we can trivially eliminate it, - // even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + // If this is a truncate from the dest type, we can trivially eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -1341,10 +1336,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // non-type-safe code. if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) && GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP)); - int64_t Offset = OffsetV->getSExtValue(); - + SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); Type *GEPIdxTy = diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b2f2e248e4..b9df5eb81e 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -106,7 +106,6 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -135,16 +134,49 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - + if (TD && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + + // Move all alloca's of zero byte objects to the entry block and merge them + // together. Note that we only do this for alloca's, because malloc should + // allocate and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { + // For a zero sized alloca there is no point in doing an array allocation. + // This is helpful if the array size is a complicated expression not used + // elsewhere. + if (AI.isArrayAllocation()) { + AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); + return &AI; + } + + // Get the first instruction in the entry block. + BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); + Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); + if (FirstInst != &AI) { + // If the entry block doesn't start with a zero-size alloca then move + // this one to the start of the entry block. There is no problem with + // dominance as the array size was forced to a constant earlier already. + AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); + if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || + TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + AI.moveBefore(FirstInst); + return &AI; + } + + // Replace this zero-sized alloca with the one at the start of the entry + // block after ensuring that the address will be aligned enough for both + // types. + unsigned MaxAlign = + std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()), + TD->getPrefTypeAlignment(AI.getAllocatedType())); + EntryAI->setAlignment(MaxAlign); + if (AI.getType() != EntryAI->getType()) + return new BitCastInst(EntryAI, AI.getType()); + return ReplaceInstUsesWith(AI, EntryAI); + } + } } // Try to aggressively remove allocas which are only used for GEPs, lifetime diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 5168e2a113..35a0bbb761 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -464,9 +464,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; - if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || + match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { if (*CI != 1) N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) + N = Builder->CreateZExt(N, Z->getDestTy()); if (I.isExact()) return BinaryOperator::CreateExactLShr(Op0, N); return BinaryOperator::CreateLShr(Op0, N); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index b8a533bf7c..c5124bf7b2 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1058,10 +1058,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - // Determine how much the GEP moves the pointer. We are guaranteed to get - // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); - int64_t Offset = OffsetV->getSExtValue(); + // Determine how much the GEP moves the pointer. + SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops); // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1069,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isMalloc(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1168,6 +1167,14 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { } EraseInstFromFunction(*I); } + + if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) { + // Replace invoke with a NOP intrinsic to maintain the original CFG + Module *M = II->getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); + InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), + ArrayRef<Value *>(), "", II->getParent()); + } return EraseInstFromFunction(MI); } return 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index a9d08db9c6..482ebef2a2 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,6 +16,12 @@ #define DEBUG_TYPE "asan" #include "FunctionBlackList.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Type.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" @@ -23,14 +29,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include "llvm/Target/TargetData.h" @@ -38,7 +39,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Type.h" #include <string> #include <algorithm> @@ -82,6 +82,14 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); +// This flags limits the number of instructions to be instrumented +// in any given BB. Normally, this should be set to unlimited (INT_MAX), +// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary +// set it to 10000. +static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb", + cl::init(10000), + cl::desc("maximal number of instructions to instrument in any given BB"), + cl::Hidden); // This flag may need to be replaced with -f[no]asan-stack. static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); @@ -149,7 +157,6 @@ struct AddressSanitizer : public ModulePass { bool poisonStackInFunction(Module &M, Function &F); virtual bool runOnModule(Module &M); bool insertGlobalRedzones(Module &M); - BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp); static char ID; // Pass identification, replacement for typeid private: @@ -212,29 +219,27 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { // Split the basic block and insert an if-then code. // Before: // Head -// SplitBefore +// Cmp // Tail // After: // Head // if (Cmp) -// NewBasicBlock -// SplitBefore +// ThenBlock // Tail // -// Returns the NewBasicBlock's terminator. -BranchInst *AddressSanitizer::splitBlockAndInsertIfThen( - Instruction *SplitBefore, Value *Cmp) { +// Returns the ThenBlock's terminator. +static BranchInst *splitBlockAndInsertIfThen(Value *Cmp) { + Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode(); BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); - BasicBlock *NewBasicBlock = - BasicBlock::Create(*C, "", Head->getParent()); - BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock, - /*ifFalse*/Tail, - Cmp); + LLVMContext &C = Head->getParent()->getParent()->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent()); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock); + BranchInst *CheckTerm = BranchInst::Create(Tail, ThenBlock); return CheckTerm; } @@ -283,8 +288,8 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { IRBuilder<> IRB(InsertBefore); Value *Cmp = IRB.CreateICmpNE(Length, - Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp); + Constant::getNullValue(Length->getType())); + InsertBefore = splitBlockAndInsertIfThen(Cmp); } instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); @@ -381,8 +386,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - Instruction *CheckTerm = splitBlockAndInsertIfThen( - cast<Instruction>(Cmp)->getNextNode(), Cmp); + Instruction *CheckTerm = splitBlockAndInsertIfThen(Cmp); IRBuilder<> IRB2(CheckTerm); size_t Granularity = 1 << MappingScale; @@ -400,7 +404,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue); - CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2); + CheckTerm = splitBlockAndInsertIfThen(Cmp2); } IRBuilder<> IRB1(CheckTerm); @@ -511,7 +515,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( M, NewTy, G->isConstant(), G->getLinkage(), - NewInitializer, "", G, G->isThreadLocal()); + NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setAlignment(RedzoneSize); @@ -689,6 +693,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { TempsToInstrument.clear(); + int NumInsnsPerBB = 0; for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (LooksLikeCodeInBug11395(BI)) return false; @@ -710,6 +715,9 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { continue; } ToInstrument.push_back(BI); + NumInsnsPerBB++; + if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) + break; } } diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index e4c8cf105c..eaa3a4000f 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -9,3 +9,5 @@ add_llvm_library(LLVMInstrumentation ProfilingUtils.cpp ThreadSanitizer.cpp ) + +add_dependencies(LLVMInstrumentation intrinsics_gen) diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 6c42137b3d..264a6a6153 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -18,23 +18,23 @@ #include "ProfilingUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/PathV2.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/PathV2.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include <string> #include <utility> using namespace llvm; @@ -448,7 +448,7 @@ bool GCOVProfiler::emitProfileArcs() { new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), - "__llvm_gcov_ctr", 0, false, 0); + "__llvm_gcov_ctr"); CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); UniqueVector<BasicBlock *> ComplexEdgePreds; @@ -687,8 +687,7 @@ void GCOVProfiler::insertCounterWriteout( FTy = FunctionType::get(Type::getInt32Ty(*Ctx), PointerType::get(FTy, 0), false); - Function *AtExitFn = - Function::Create(FTy, GlobalValue::ExternalLinkage, "atexit", M); + Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 31af145e79..4c12a9b624 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,26 +22,26 @@ #define DEBUG_TYPE "tsan" #include "FunctionBlackList.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Type.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Intrinsics.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Type.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/BoundsChecking.cpp b/lib/Transforms/Scalar/BoundsChecking.cpp index d10d97ca05..0690d76e7b 100644 --- a/lib/Transforms/Scalar/BoundsChecking.cpp +++ b/lib/Transforms/Scalar/BoundsChecking.cpp @@ -14,55 +14,29 @@ #define DEBUG_TYPE "bounds-checking" #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetFolder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Metadata.h" -#include "llvm/Operator.h" -#include "llvm/Pass.h" using namespace llvm; -static cl::opt<bool> ManyTrapBB("bounds-checking-multiple-traps", - cl::desc("Use one trap block per assertion")); +static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap", + cl::desc("Use one trap block per function")); STATISTIC(ChecksAdded, "Bounds checks added"); STATISTIC(ChecksSkipped, "Bounds checks skipped"); STATISTIC(ChecksUnable, "Bounds checks unable to add"); -STATISTIC(ChecksUnableInterproc, "Bounds checks unable to add (interprocedural)"); -STATISTIC(ChecksUnableLoad, "Bounds checks unable to add (LoadInst)"); typedef IRBuilder<true, TargetFolder> BuilderTy; namespace { - // FIXME: can use unions here to save space - struct CacheData { - APInt Offset; - Value *OffsetValue; - APInt Size; - Value *SizeValue; - bool ReturnVal; - CacheData() {} - CacheData(APInt Off, Value *OffVal, APInt Sz, Value *SzVal, bool Ret) : - Offset(Off), OffsetValue(OffVal), Size(Sz), SizeValue(SzVal), - ReturnVal(Ret) {} - }; - typedef DenseMap<Value*, CacheData> CacheMapTy; - typedef SmallPtrSet<Value*, 8> PtrSetTy; - struct BoundsChecking : public FunctionPass { static char ID; @@ -74,20 +48,15 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); - AU.addRequired<LoopInfo>(); - AU.addRequired<ScalarEvolution>(); } private: const TargetData *TD; - LoopInfo *LI; - ScalarEvolution *SE; + ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; - Function *Fn; + Instruction *Inst; BasicBlock *TrapBB; unsigned Penalty; - CacheMapTy CacheMap; - PtrSetTy SeenPtrs; BasicBlock *getTrapBB(); void emitBranchToTrap(Value *Cmp = 0); @@ -108,9 +77,10 @@ INITIALIZE_PASS_END(BoundsChecking, "bounds-checking", /// getTrapBB - create a basic block that traps. All overflowing conditions /// branch to this block. There's only one trap block per function. BasicBlock *BoundsChecking::getTrapBB() { - if (TrapBB && !ManyTrapBB) + if (TrapBB && SingleTrapBB) return TrapBB; + Function *Fn = Inst->getParent()->getParent(); BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint(); TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); Builder->SetInsertPoint(TrapBB); @@ -119,6 +89,7 @@ BasicBlock *BoundsChecking::getTrapBB() { CallInst *TrapCall = Builder->CreateCall(F); TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); + TrapCall->setDebugLoc(Inst->getDebugLoc()); Builder->CreateUnreachable(); Builder->SetInsertPoint(PrevInsertPoint); @@ -129,6 +100,16 @@ BasicBlock *BoundsChecking::getTrapBB() { /// emitBranchToTrap - emit a branch instruction to a trap block. /// If Cmp is non-null, perform a jump only if its value evaluates to true. void BoundsChecking::emitBranchToTrap(Value *Cmp) { + // check if the comparison is always false + ConstantInt *C = dyn_cast_or_null<ConstantInt>(Cmp); + if (C) { + ++ChecksSkipped; + if (!C->getZExtValue()) + return; + else + Cmp = 0; // unconditional branch + } + Instruction *Inst = Builder->GetInsertPoint(); BasicBlock *OldBB = Inst->getParent(); BasicBlock *Cont = OldBB->splitBasicBlock(Inst); @@ -141,310 +122,6 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { } -#define GET_VALUE(Val, Int) \ - if (!Val) \ - Val = ConstantInt::get(IntTy, Int) - -#define RETURN(Val) \ - do { ReturnVal = Val; goto cache_and_return; } while (0) - -/// computeAllocSize - compute the object size and the offset within the object -/// pointed by Ptr. OffsetValue/SizeValue will be null if they are constant, and -/// therefore the result is given in Offset/Size variables instead. -/// Returns true if the offset and size could be computed within the given -/// maximum run-time penalty. -bool BoundsChecking::computeAllocSize(Value *Ptr, APInt &Offset, - Value* &OffsetValue, APInt &Size, - Value* &SizeValue) { - Ptr = Ptr->stripPointerCasts(); - - // lookup to see if we've seen the Ptr before - CacheMapTy::iterator CacheIt = CacheMap.find(Ptr); - if (CacheIt != CacheMap.end()) { - CacheData &Cache = CacheIt->second; - Offset = Cache.Offset; - OffsetValue = Cache.OffsetValue; - Size = Cache.Size; - SizeValue = Cache.SizeValue; - return Cache.ReturnVal; - } - - // record the pointers that were handled in this run, so that they can be - // cleaned later if something fails - SeenPtrs.insert(Ptr); - - IntegerType *IntTy = TD->getIntPtrType(Fn->getContext()); - unsigned IntTyBits = IntTy->getBitWidth(); - bool ReturnVal; - - // always generate code immediately before the instruction being processed, so - // that the generated code dominates the same BBs - Instruction *PrevInsertPoint = Builder->GetInsertPoint(); - if (Instruction *I = dyn_cast<Instruction>(Ptr)) - Builder->SetInsertPoint(I); - - // initalize with "don't know" state: offset=0 and size=uintmax - Offset = 0; - Size = APInt::getMaxValue(TD->getTypeSizeInBits(IntTy)); - OffsetValue = SizeValue = 0; - - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - APInt PtrOffset(IntTyBits, 0); - Value *PtrOffsetValue = 0; - if (!computeAllocSize(GEP->getPointerOperand(), PtrOffset, PtrOffsetValue, - Size, SizeValue)) - RETURN(false); - - if (GEP->hasAllConstantIndices()) { - SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - // if PtrOffset is constant, return immediately - if (!PtrOffsetValue) { - Offset += PtrOffset; - RETURN(true); - } - OffsetValue = ConstantInt::get(IntTy, Offset); - } else if (Penalty > 1) { - OffsetValue = EmitGEPOffset(Builder, *TD, GEP); - GET_VALUE(PtrOffsetValue, PtrOffset); - } else - RETURN(false); - - OffsetValue = Builder->CreateAdd(PtrOffsetValue, OffsetValue); - RETURN(true); - - // global variable with definitive size - } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - Size = TD->getTypeAllocSize(C->getType()); - RETURN(true); - } - RETURN(false); - - // stack allocation - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Ptr)) { - if (!AI->getAllocatedType()->isSized()) - RETURN(false); - - Size = TD->getTypeAllocSize(AI->getAllocatedType()); - if (!AI->isArrayAllocation()) - RETURN(true); // we are done - - Value *ArraySize = AI->getArraySize(); - if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { - Size *= C->getValue(); - RETURN(true); - } - - if (Penalty < 2) - RETURN(false); - - // VLA: compute size dynamically - SizeValue = ConstantInt::get(ArraySize->getType(), Size); - SizeValue = Builder->CreateMul(SizeValue, ArraySize); - RETURN(true); - - // function arguments - } else if (Argument *A = dyn_cast<Argument>(Ptr)) { - // right now we only support byval arguments, so that no interprocedural - // analysis is necessary - if (!A->hasByValAttr()) { - ++ChecksUnableInterproc; - RETURN(false); - } - - PointerType *PT = cast<PointerType>(A->getType()); - Size = TD->getTypeAllocSize(PT->getElementType()); - RETURN(true); - - // ptr = select(ptr1, ptr2) - } else if (SelectInst *SI = dyn_cast<SelectInst>(Ptr)) { - APInt OffsetTrue(IntTyBits, 0), OffsetFalse(IntTyBits, 0); - APInt SizeTrue(IntTyBits, 0), SizeFalse(IntTyBits, 0); - Value *OffsetValueTrue = 0, *OffsetValueFalse = 0; - Value *SizeValueTrue = 0, *SizeValueFalse = 0; - - bool TrueAlloc = computeAllocSize(SI->getTrueValue(), OffsetTrue, - OffsetValueTrue, SizeTrue, SizeValueTrue); - bool FalseAlloc = computeAllocSize(SI->getFalseValue(), OffsetFalse, - OffsetValueFalse, SizeFalse, - SizeValueFalse); - if (!TrueAlloc || !FalseAlloc) - RETURN(false); - - // fold constant sizes & offsets if they are equal - if (!OffsetValueTrue && !OffsetValueFalse && OffsetTrue == OffsetFalse) - Offset = OffsetTrue; - else if (Penalty > 1) { - GET_VALUE(OffsetValueTrue, OffsetTrue); - GET_VALUE(OffsetValueFalse, OffsetFalse); - OffsetValue = Builder->CreateSelect(SI->getCondition(), OffsetValueTrue, - OffsetValueFalse); - } else - RETURN(false); - - if (!SizeValueTrue && !SizeValueFalse && SizeTrue == SizeFalse) - Size = SizeTrue; - else if (Penalty > 1) { - GET_VALUE(SizeValueTrue, SizeTrue); - GET_VALUE(SizeValueFalse, SizeFalse); - SizeValue = Builder->CreateSelect(SI->getCondition(), SizeValueTrue, - SizeValueFalse); - } else - RETURN(false); - RETURN(true); - - // call allocation function - } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { - SmallVector<unsigned, 4> Args; - - if (MDNode *MD = CI->getMetadata("alloc_size")) { - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - Args.push_back(cast<ConstantInt>(MD->getOperand(i))->getZExtValue()); - - } else if (Function *Callee = CI->getCalledFunction()) { - FunctionType *FTy = Callee->getFunctionType(); - - // alloc(size) - if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy()) { - if ((Callee->getName() == "malloc" || - Callee->getName() == "valloc" || - Callee->getName() == "_Znwj" || // operator new(unsigned int) - Callee->getName() == "_Znwm" || // operator new(unsigned long) - Callee->getName() == "_Znaj" || // operator new[](unsigned int) - Callee->getName() == "_Znam")) { - Args.push_back(0); - } - } else if (FTy->getNumParams() == 2) { - // alloc(_, x) - if (FTy->getParamType(1)->isIntegerTy() && - ((Callee->getName() == "realloc" || - Callee->getName() == "reallocf"))) { - Args.push_back(1); - - // alloc(x, y) - } else if (FTy->getParamType(0)->isIntegerTy() && - FTy->getParamType(1)->isIntegerTy() && - Callee->getName() == "calloc") { - Args.push_back(0); - Args.push_back(1); - } - } else if (FTy->getNumParams() == 3) { - // alloc(_, _, x) - if (FTy->getParamType(2)->isIntegerTy() && - Callee->getName() == "posix_memalign") { - Args.push_back(2); - } - } - } - - if (Args.empty()) - RETURN(false); - - // check if all arguments are constant. if so, the object size is also const - bool AllConst = true; - for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - if (!isa<ConstantInt>(CI->getArgOperand(*I))) { - AllConst = false; - break; - } - } - - if (AllConst) { - Size = 1; - for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - ConstantInt *Arg = cast<ConstantInt>(CI->getArgOperand(*I)); - Size *= Arg->getValue().zextOrSelf(IntTyBits); - } - RETURN(true); - } - - if (Penalty < 2) - RETURN(false); - - // not all arguments are constant, so create a sequence of multiplications - for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - Value *Arg = Builder->CreateZExt(CI->getArgOperand(*I), IntTy); - if (!SizeValue) { - SizeValue = Arg; - continue; - } - SizeValue = Builder->CreateMul(SizeValue, Arg); - } - RETURN(true); - - // TODO: handle more standard functions (+ wchar cousins): - // - strdup / strndup - // - strcpy / strncpy - // - strcat / strncat - // - memcpy / memmove - // - strcat / strncat - // - memset - - } else if (PHINode *PHI = dyn_cast<PHINode>(Ptr)) { - // create 2 PHIs: one for offset and another for size - PHINode *OffsetPHI = Builder->CreatePHI(IntTy, PHI->getNumIncomingValues()); - PHINode *SizePHI = Builder->CreatePHI(IntTy, PHI->getNumIncomingValues()); - - // insert right away in the cache to handle recursive PHIs - CacheMap[Ptr] = CacheData(APInt(), OffsetPHI, APInt(), SizePHI, true); - - // compute offset/size for each PHI incoming pointer - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - Builder->SetInsertPoint(PHI->getIncomingBlock(i)->getFirstInsertionPt()); - - APInt PhiOffset(IntTyBits, 0), PhiSize(IntTyBits, 0); - Value *PhiOffsetValue = 0, *PhiSizeValue = 0; - - if (!computeAllocSize(PHI->getIncomingValue(i), PhiOffset, PhiOffsetValue, - PhiSize, PhiSizeValue)) { - OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); - OffsetPHI->eraseFromParent(); - SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); - SizePHI->eraseFromParent(); - RETURN(false); - } - - GET_VALUE(PhiOffsetValue, PhiOffset); - GET_VALUE(PhiSizeValue, PhiSize); - - OffsetPHI->addIncoming(PhiOffsetValue, PHI->getIncomingBlock(i)); - SizePHI->addIncoming(PhiSizeValue, PHI->getIncomingBlock(i)); - } - - OffsetValue = OffsetPHI; - SizeValue = SizePHI; - RETURN(true); - - } else if (isa<UndefValue>(Ptr) || isa<ConstantPointerNull>(Ptr)) { - Size = 0; - RETURN(true); - - } else if (isa<LoadInst>(Ptr)) { - ++ChecksUnableLoad; - RETURN(false); - } - - DEBUG(dbgs() << "computeAllocSize unhandled value:\n" << *Ptr << "\n"); - RETURN(false); - -cache_and_return: - // cache the result and return - CacheMap[Ptr] = CacheData(Offset, OffsetValue, Size, SizeValue, ReturnVal); - - // non-computable results can be safely cached - if (!ReturnVal) - SeenPtrs.erase(Ptr); - - Builder->SetInsertPoint(PrevInsertPoint); - return ReturnVal; -} - - /// instrument - adds run-time bounds checks to memory accessing instructions. /// Ptr is the pointer that will be read/written, and InstVal is either the /// result from the load or the value being stored. It is used to determine the @@ -455,67 +132,29 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); - IntegerType *IntTy = TD->getIntPtrType(Fn->getContext()); - unsigned IntTyBits = IntTy->getBitWidth(); - - APInt Offset(IntTyBits, 0), Size(IntTyBits, 0); - Value *OffsetValue = 0, *SizeValue = 0; - - if (!computeAllocSize(Ptr, Offset, OffsetValue, Size, SizeValue)) { - DEBUG(dbgs() << "computeAllocSize failed:\n" << *Ptr << "\n"); - - // erase everything that was computed in this iteration from the cache, so - // that no dangling references are left behind. We could be a bit smarter if - // we kept a dependency graph. It's probably not worth the complexity, - // though. - for (PtrSetTy::iterator I=SeenPtrs.begin(), E=SeenPtrs.end(); I != E; ++I) - CacheMap.erase(*I); - SeenPtrs.clear(); + SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr); + if (!ObjSizeEval->bothKnown(SizeOffset)) { ++ChecksUnable; return false; } + Value *Size = SizeOffset.first; + Value *Offset = SizeOffset.second; + + IntegerType *IntTy = TD->getIntPtrType(Inst->getContext()); + Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); + // three checks are required to ensure safety: // . Offset >= 0 (since the offset is given from the base ptr) // . Size >= Offset (unsigned) // . Size - Offset >= NeededSize (unsigned) - if (!OffsetValue && !SizeValue) { - if (Offset.slt(0) || Size.ult(Offset) || (Size - Offset).ult(NeededSize)) { - // Out of bounds - emitBranchToTrap(); - ++ChecksAdded; - return true; - } - // in bounds - ++ChecksSkipped; - return false; - } - - // emit check for offset < 0 - Value *CmpOffset = 0; - if (OffsetValue) - CmpOffset = Builder->CreateICmpSLT(OffsetValue, ConstantInt::get(IntTy, 0)); - else if (Offset.slt(0)) { - // offset proved to be negative - emitBranchToTrap(); - ++ChecksAdded; - return true; - } - - // we couldn't determine statically if the memory access is safe; emit a - // run-time check - GET_VALUE(OffsetValue, Offset); - GET_VALUE(SizeValue, Size); - - Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows - Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue); - Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue); - Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); - Value *Or = Builder->CreateOr(Cmp1, Cmp2); - if (CmpOffset) - Or = Builder->CreateOr(CmpOffset, Or); + Value *ObjSize = Builder->CreateSub(Size, Offset); + Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); + Value *Cmp2 = Builder->CreateICmpULT(Size, Offset); + Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); + Value *Or = Builder->CreateOr(Cmp1, Builder->CreateOr(Cmp2, Cmp3)); emitBranchToTrap(Or); ++ChecksAdded; @@ -524,13 +163,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { bool BoundsChecking::runOnFunction(Function &F) { TD = &getAnalysis<TargetData>(); - LI = &getAnalysis<LoopInfo>(); - SE = &getAnalysis<ScalarEvolution>(); TrapBB = 0; - Fn = &F; BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); Builder = &TheBuilder; + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjSizeEval = &TheObjSizeEval; // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory // touching instructions @@ -545,16 +183,16 @@ bool BoundsChecking::runOnFunction(Function &F) { bool MadeChange = false; for (std::vector<Instruction*>::iterator i = WorkList.begin(), e = WorkList.end(); i != e; ++i) { - Instruction *I = *i; + Inst = *i; - Builder->SetInsertPoint(I); - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + Builder->SetInsertPoint(Inst); + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { MadeChange |= instrument(LI->getPointerOperand(), LI); - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); - } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(I)) { + } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) { MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); - } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(I)) { + } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) { MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); } else { llvm_unreachable("unknown Instruction type"); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 635c34486e..bf9cc66392 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -33,3 +33,5 @@ add_llvm_library(LLVMScalarOpts Sink.cpp TailRecursionElimination.cpp ) + +add_dependencies(LLVMScalarOpts intrinsics_gen) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 24d64b50c2..cbc089ab78 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,32 +18,32 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -1133,7 +1133,8 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { // If we have a SelectInst that will likely profit from branch prediction, // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI->isPredictableSelectExpensive()) + if (DisableSelectToBranch || OptSize || !TLI || + !TLI->isPredictableSelectExpensive()) return false; if (!SI->getCondition()->getType()->isIntegerTy(1) || diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f498cc7934..c8448fa6c1 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -32,7 +32,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -71,7 +71,7 @@ namespace { bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallPtrSet<Value*, 16> &DeadStackObjects); + SmallSetVector<Value*, 16> &DeadStackObjects); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -106,7 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, - SmallPtrSet<Value*, 16> *ValueSet = 0) { + SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -136,7 +136,7 @@ static void DeleteDeadInstruction(Instruction *I, DeadInst->eraseFromParent(); - if (ValueSet) ValueSet->erase(DeadInst); + if (ValueSet) ValueSet->remove(DeadInst); } while (!NowDeadInsts.empty()); } @@ -275,39 +275,9 @@ static Value *getStoredPointerOperand(Instruction *I) { } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { - const TargetData *TD = AA.getTargetData(); - - if (const CallInst *CI = extractMallocCall(V)) { - if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) - return C->getZExtValue(); - } - - if (const CallInst *CI = extractCallocCall(V)) { - if (const ConstantInt *C1 = dyn_cast<ConstantInt>(CI->getArgOperand(0))) - if (const ConstantInt *C2 = dyn_cast<ConstantInt>(CI->getArgOperand(1))) - return (C1->getValue() * C2->getValue()).getZExtValue(); - } - - if (TD == 0) - return AliasAnalysis::UnknownSize; - - if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) { - // Get size information for the alloca - if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) - return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); - } - - if (const Argument *A = dyn_cast<Argument>(V)) { - if (A->hasByValAttr()) - if (PointerType *PT = dyn_cast<PointerType>(A->getType())) - return TD->getTypeAllocSize(PT->getElementType()); - } - - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { - if (!GV->mayBeOverridden()) - return TD->getTypeAllocSize(GV->getType()->getElementType()); - } - + uint64_t Size; + if (getObjectSize(V, Size, AA.getTargetData())) + return Size; return AliasAnalysis::UnknownSize; } @@ -700,21 +670,18 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Keep track of all of the stack objects that are dead at the end of the // function. - SmallPtrSet<Value*, 16> DeadStackObjects; + SmallSetVector<Value*, 16> DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - DeadStackObjects.insert(AI); + if (isa<AllocaInst>(I)) + DeadStackObjects.insert(I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - CallInst *CI = extractMallocCall(I); - if (!CI) - CI = extractCallocCall(I); - if (CI && !PointerMayBeCaptured(CI, true, true)) - DeadStackObjects.insert(CI); + else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + DeadStackObjects.insert(I); } // Treat byval arguments the same, stores to them are dead at the end of the @@ -773,18 +740,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) { - DeadStackObjects.erase(A); - continue; - } - - if (CallInst *CI = extractMallocCall(BBI)) { - DeadStackObjects.erase(CI); - continue; - } - - if (CallInst *CI = extractCallocCall(BBI)) { - DeadStackObjects.erase(CI); + if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) { + DeadStackObjects.remove(BBI); continue; } @@ -797,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; - for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the call site touches it. AliasAnalysis::ModRefResult A = @@ -809,7 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.erase(*I); + DeadStackObjects.remove(*I); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. @@ -856,7 +813,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// of the stack objects in the DeadStackObjects set. If so, they become live /// because the location is being loaded. void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallPtrSet<Value*, 16> &DeadStackObjects) { + SmallSetVector<Value*, 16> &DeadStackObjects) { const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); // A constant can't be in the dead pointer set. @@ -866,12 +823,12 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // If the kill pointer can be easily reduced to an alloca, don't bother doing // extraneous AA queries. if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) { - DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer)); + DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer)); return; } SmallVector<Value*, 16> NowLive; - for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), + for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the loaded location could alias the stack location. AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA)); @@ -881,5 +838,5 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end(); I != E; ++I) - DeadStackObjects.erase(*I); + DeadStackObjects.remove(*I); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c247ea9360..476ec383e6 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -18,9 +18,15 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" @@ -31,21 +37,14 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; using namespace PatternMatch; @@ -1437,7 +1436,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1736,155 +1735,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B) { - if (!A || !B) - return NULL; - - if (A == B) - return A; - - SmallVector<MDNode *, 4> PathA; - MDNode *T = A; - while (T) { - PathA.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; - } - - SmallVector<MDNode *, 4> PathB; - T = B; - while (T) { - PathB.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; - } - - int IA = PathA.size() - 1; - int IB = PathB.size() - 1; - - MDNode *Ret = 0; - while (IA >= 0 && IB >=0) { - if (PathA[IA] == PathB[IB]) - Ret = PathA[IA]; - else - break; - --IA; - --IB; - } - return Ret; -} - -static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B) { - if (!A || !B) - return NULL; - - APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF(); - APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF(); - if (AVal.compare(BVal) == APFloat::cmpLessThan) - return A; - return B; -} - -static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { - return A.getUpper() == B.getLower() || A.getLower() == B.getUpper(); -} - -static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) { - return !A.intersectWith(B).isEmptySet() || isContiguous(A, B); -} - -static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low, - ConstantInt *High) { - ConstantRange NewRange(Low->getValue(), High->getValue()); - unsigned Size = EndPoints.size(); - APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue(); - APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue(); - ConstantRange LastRange(LB, LE); - if (canBeMerged(NewRange, LastRange)) { - ConstantRange Union = LastRange.unionWith(NewRange); - Type *Ty = High->getType(); - EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower()); - EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper()); - return true; - } - return false; -} - -static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low, - ConstantInt *High) { - if (!EndPoints.empty()) - if (tryMergeRange(EndPoints, Low, High)) - return; - - EndPoints.push_back(Low); - EndPoints.push_back(High); -} - -static MDNode *getMostGenericRange(MDNode *A, MDNode *B) { - // Given two ranges, we want to compute the union of the ranges. This - // is slightly complitade by having to combine the intervals and merge - // the ones that overlap. - - if (!A || !B) - return NULL; - - if (A == B) - return A; - - // First, walk both lists in older of the lower boundary of each interval. - // At each step, try to merge the new interval to the last one we adedd. - SmallVector<Value*, 4> EndPoints; - int AI = 0; - int BI = 0; - int AN = A->getNumOperands() / 2; - int BN = B->getNumOperands() / 2; - while (AI < AN && BI < BN) { - ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI)); - ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI)); - - if (ALow->getValue().slt(BLow->getValue())) { - addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1))); - ++AI; - } else { - addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1))); - ++BI; - } - } - while (AI < AN) { - addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)), - cast<ConstantInt>(A->getOperand(2 * AI + 1))); - ++AI; - } - while (BI < BN) { - addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)), - cast<ConstantInt>(B->getOperand(2 * BI + 1))); - ++BI; - } - - // If we have more than 2 ranges (4 endpoints) we have to try to merge - // the last and first ones. - unsigned Size = EndPoints.size(); - if (Size > 4) { - ConstantInt *FB = cast<ConstantInt>(EndPoints[0]); - ConstantInt *FE = cast<ConstantInt>(EndPoints[1]); - if (tryMergeRange(EndPoints, FB, FE)) { - for (unsigned i = 0; i < Size - 2; ++i) { - EndPoints[i] = EndPoints[i + 2]; - } - EndPoints.resize(Size - 2); - } - } - - // If in the end we have a single range, it is possible that it is now the - // full range. Just drop the metadata in that case. - if (EndPoints.size() == 2) { - ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(), - cast<ConstantInt>(EndPoints[1])->getValue()); - if (Range.isFullSet()) - return NULL; - } - - return MDNode::get(A->getContext(), EndPoints); -} - static void patchReplacementInstruction(Value *Repl, Instruction *I) { // Patch the replacement so that it is not more restrictive than the value // being replaced. @@ -1911,16 +1761,16 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); case LLVMContext::MD_tbaa: - ReplInst->setMetadata(Kind, getMostGenericTBAA(IMD, ReplMD)); + ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD)); break; case LLVMContext::MD_range: - ReplInst->setMetadata(Kind, getMostGenericRange(IMD, ReplMD)); + ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD)); break; case LLVMContext::MD_prof: llvm_unreachable("MD_prof in a non terminator instruction"); break; case LLVMContext::MD_fpmath: - ReplInst->setMetadata(Kind, getMostGenericFPMath(IMD, ReplMD)); + ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD)); break; } } @@ -2101,7 +1951,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ad15cbb9b4..5fe9462159 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -43,20 +43,20 @@ #define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 94c229a8e2..4ba969e675 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1308,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; case LSRUse::Special: - // Only handle -1 scales, or no scale. - return AM.Scale == 0 || AM.Scale == -1; + // Special case Basic to handle -1 scales. + return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0; } llvm_unreachable("Invalid LSRUse Kind!"); @@ -4268,13 +4268,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); } - // Flush the operand list to suppress SCEVExpander hoisting. - if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); - Ops.clear(); - Ops.push_back(SE.getUnknown(FullV)); - } - // Expand the ScaledReg portion. Value *ICmpScaledV = 0; if (F.AM.Scale != 0) { @@ -4296,23 +4289,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. + + // Flush the operand list to suppress SCEVExpander hoisting address modes. + if (!Ops.empty() && LU.Kind == LSRUse::Address) { + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.AM.Scale)); Ops.push_back(ScaledS); - - // Flush the operand list to suppress SCEVExpander hoisting. - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); - Ops.clear(); - Ops.push_back(SE.getUnknown(FullV)); } } // Expand the GV portion. if (F.AM.BaseGV) { + // Flush the operand list to suppress SCEVExpander hoisting. + if (!Ops.empty()) { + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } Ops.push_back(SE.getUnknown(F.AM.BaseGV)); + } - // Flush the operand list to suppress SCEVExpander hoisting. + // Flush the operand list to suppress SCEVExpander hoisting of both folded and + // unfolded offsets. LSR assumes they both live next to their uses. + if (!Ops.empty()) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 689bbe9b03..221911866c 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -15,9 +15,9 @@ #define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 4341577f4d..052cc3dac0 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -15,21 +15,21 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" #include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <list> using namespace llvm; diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index d89996a1ff..87e17c7f90 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -4064,8 +4064,22 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; - --BBI; - while (isNoopInstruction(BBI)) --BBI; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + if (&*BBI == GetObjCArg(Inst)) { Changed = true; InlineAsm *IA = @@ -4075,6 +4089,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } + decline_rv_optimization: break; } case IC_InitWeak: { diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 66fa0744b8..bcf34b5256 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -26,20 +26,20 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" #include <algorithm> using namespace llvm; @@ -132,7 +132,7 @@ namespace { private: void BuildRankMap(Function &F); unsigned getRank(Value *V); - Value *ReassociateExpression(BinaryOperator *I); + void ReassociateExpression(BinaryOperator *I); void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); @@ -667,23 +667,13 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, /// the new expression into. SmallVector<BinaryOperator*, 8> NodesToRewrite; unsigned Opcode = I->getOpcode(); - NodesToRewrite.push_back(I); + BinaryOperator *Op = I; // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. BinaryOperator *ExpressionChanged = 0; - BinaryOperator *Previous; - BinaryOperator *Op = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - assert(!NodesToRewrite.empty() && - "Optimized expressions has more nodes than original!"); - Previous = Op; Op = NodesToRewrite.pop_back_val(); - if (ExpressionChanged) - // Compactify the tree instructions together with each other to guarantee - // that the expression tree is dominated by all of Ops. - Op->moveBefore(Previous); - + for (unsigned i = 0; ; ++i) { // The last operation (which comes earliest in the IR) is special as both // operands will come from Ops, rather than just one with the other being // a subexpression. @@ -754,32 +744,47 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // from the original expression then just rewrite the rest of the expression // into it. if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) { - NodesToRewrite.push_back(BO); + Op = BO; continue; } // Otherwise, grab a spare node from the original expression and use that as - // the left-hand side. - assert(!NodesToRewrite.empty() && - "Optimized expressions has more nodes than original!"); + // the left-hand side. If there are no nodes left then the optimizers made + // an expression with more nodes than the original! This usually means that + // they did something stupid but it might mean that the problem was just too + // hard (finding the mimimal number of multiplications needed to realize a + // multiplication expression is NP-complete). Whatever the reason, smart or + // stupid, create a new node if there are none left. + BinaryOperator *NewOp; + if (NodesToRewrite.empty()) { + Constant *Undef = UndefValue::get(I->getType()); + NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), + Undef, Undef, "", I); + } else { + NewOp = NodesToRewrite.pop_back_val(); + } + DEBUG(dbgs() << "RA: " << *Op << '\n'); - Op->setOperand(0, NodesToRewrite.back()); + Op->setOperand(0, NewOp); DEBUG(dbgs() << "TO: " << *Op << '\n'); ExpressionChanged = Op; MadeChange = true; ++NumChanged; + Op = NewOp; } // If the expression changed non-trivially then clear out all subclass data - // starting from the operator specified in ExpressionChanged. - if (ExpressionChanged) { + // starting from the operator specified in ExpressionChanged, and compactify + // the operators to just before the expression root to guarantee that the + // expression tree is dominated by all of Ops. + if (ExpressionChanged) do { ExpressionChanged->clearSubclassOptionalData(); if (ExpressionChanged == I) break; + ExpressionChanged->moveBefore(I); ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->use_begin()); } while (1); - } // Throw away any left over nodes from the original expression. for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i) @@ -1478,14 +1483,17 @@ void Reassociate::EraseInst(Instruction *I) { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); // Erase the dead instruction. ValueRankMap.erase(I); + RedoInsts.remove(I); I->eraseFromParent(); // Optimize its operands. + SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) { // If this is a node in an expression tree, climb to the expression root // and add that since that's where optimization actually happens. unsigned Opcode = Op->getOpcode(); - while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode) + while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode && + Visited.insert(Op)) Op = Op->use_back(); RedoInsts.insert(Op); } @@ -1585,7 +1593,7 @@ void Reassociate::OptimizeInst(Instruction *I) { ReassociateExpression(BO); } -Value *Reassociate::ReassociateExpression(BinaryOperator *I) { +void Reassociate::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. @@ -1612,6 +1620,9 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // OptimizeExpression - Now that we have the expression tree in a convenient // sorted form, optimize it globally if possible. if (Value *V = OptimizeExpression(I, Ops)) { + if (V == I) + // Self-referential expression in unreachable code. + return; // This expression tree simplified to something that isn't a tree, // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); @@ -1620,7 +1631,7 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { VI->setDebugLoc(I->getDebugLoc()); RedoInsts.insert(I); ++NumAnnihil; - return V; + return; } // We want to sink immediates as deeply as possible except in the case where @@ -1638,19 +1649,22 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); if (Ops.size() == 1) { + if (Ops[0].Op == I) + // Self-referential expression in unreachable code. + return; + // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) OI->setDebugLoc(I->getDebugLoc()); RedoInsts.insert(I); - return Ops[0].Op; + return; } // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. RewriteExprTree(I, Ops); - return I; } bool Reassociate::runOnFunction(Function &F) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 113397fc11..e3e3c9eb17 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -22,34 +22,34 @@ #define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -60,12 +60,25 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { - SROA(int T, bool hasDT, char &ID) + SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT) : FunctionPass(ID), HasDomTree(hasDT) { if (T == -1) SRThreshold = 128; else SRThreshold = T; + if (ST == -1) + StructMemberThreshold = 32; + else + StructMemberThreshold = ST; + if (AT == -1) + ArrayElementThreshold = 8; + else + ArrayElementThreshold = AT; + if (SLT == -1) + // Do not limit the scalar integer load size if no threshold is given. + ScalarLoadThreshold = -1; + else + ScalarLoadThreshold = SLT; } bool runOnFunction(Function &F); @@ -87,7 +100,7 @@ namespace { struct AllocaInfo { /// The alloca to promote. AllocaInst *AI; - + /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite /// looping and avoid redundant work. SmallPtrSet<PHINode*, 8> CheckedPHIs; @@ -116,8 +129,21 @@ namespace { hasSubelementAccess(false), hasALoadOrStore(false) {} }; + /// SRThreshold - The maximum alloca size to considered for SROA. unsigned SRThreshold; + /// StructMemberThreshold - The maximum number of members a struct can + /// contain to be considered for SROA. + unsigned StructMemberThreshold; + + /// ArrayElementThreshold - The maximum number of elements an array can + /// have to be considered for SROA. + unsigned ArrayElementThreshold; + + /// ScalarLoadThreshold - The maximum size in bits of scalars to load when + /// converting to scalar + unsigned ScalarLoadThreshold; + void MarkUnsafe(AllocaInfo &I, Instruction *User) { I.isUnsafe = true; DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); @@ -156,6 +182,7 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); + bool ShouldAttemptScalarRepl(AllocaInst *AI); static MemTransferInst *isOnlyCopiedFromConstantGlobal( AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); @@ -165,7 +192,8 @@ namespace { struct SROA_DT : public SROA { static char ID; public: - SROA_DT(int T = -1) : SROA(T, true, ID) { + SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : + SROA(T, true, ID, ST, AT, SLT) { initializeSROA_DTPass(*PassRegistry::getPassRegistry()); } @@ -181,7 +209,8 @@ namespace { struct SROA_SSAUp : public SROA { static char ID; public: - SROA_SSAUp(int T = -1) : SROA(T, false, ID) { + SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : + SROA(T, false, ID, ST, AT, SLT) { initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); } @@ -210,10 +239,15 @@ INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", // Public interface to the ScalarReplAggregates pass FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, - bool UseDomTree) { + bool UseDomTree, + int StructMemberThreshold, + int ArrayElementThreshold, + int ScalarLoadThreshold) { if (UseDomTree) - return new SROA_DT(Threshold); - return new SROA_SSAUp(Threshold); + return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold, + ScalarLoadThreshold); + return new SROA_SSAUp(Threshold, StructMemberThreshold, + ArrayElementThreshold, ScalarLoadThreshold); } @@ -229,6 +263,7 @@ class ConvertToScalarInfo { /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; + unsigned ScalarLoadThreshold; /// IsNotTrivial - This is set to true if there is some access to the object /// which means that mem2reg can't promote it. @@ -264,23 +299,33 @@ class ConvertToScalarInfo { /// large integers unless there is some potential for optimization. bool HadNonMemTransferAccess; + /// HadDynamicAccess - True if some element of this alloca was dynamic. + /// We don't yet have support for turning a dynamic access into a large + /// integer. + bool HadDynamicAccess; + public: - explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), - VectorTy(0), HadNonMemTransferAccess(false) { } + explicit ConvertToScalarInfo(unsigned Size, const TargetData &td, + unsigned SLT) + : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false), + ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false), + HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: - bool CanConvertToScalar(Value *V, uint64_t Offset); + bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx); void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); - void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); + void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset, + Value *NonConstantIdx); Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); }; } // end anonymous namespace. @@ -291,7 +336,7 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) + if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) return 0; // If an alloca has only memset / memcpy uses, it may still have an Unknown @@ -316,16 +361,27 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { NewTy = VectorTy; // Use the vector type. } else { unsigned BitWidth = AllocaSize * 8; + + // Do not convert to scalar integer if the alloca size exceeds the + // scalar load threshold. + if (BitWidth > ScalarLoadThreshold) + return 0; + if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) return 0; + // Dynamic accesses on integers aren't yet supported. They need us to shift + // by a dynamic amount which could be difficult to work out as we might not + // know whether to use a left or right shift. + if (ScalarKind == Integer && HadDynamicAccess) + return 0; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0); + ConvertUsesToScalar(AI, NewAI, 0, 0); return NewAI; } @@ -412,7 +468,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { +bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, + Value* NonConstantIdx) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -442,24 +499,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { if (!onlyUsedByLifetimeMarkers(BCI)) IsNotTrivial = true; // Can't be mem2reg'd. - if (!CanConvertToScalar(BCI, Offset)) + if (!CanConvertToScalar(BCI, Offset, NonConstantIdx)) return false; continue; } if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // If this is a GEP with a variable indices, we can't handle it. - if (!GEP->hasAllConstantIndices()) + PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType()); + if (!PtrTy) return false; // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return false; - uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), + Value *GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + if (!isa<VectorType>(PtrTy->getElementType())) + return false; + if (NonConstantIdx) + return false; + GEPNonConstantIdx = Indices.pop_back_val(); + if (!GEPNonConstantIdx->getType()->isIntegerTy(32)) + return false; + HadDynamicAccess = true; + } else + GEPNonConstantIdx = NonConstantIdx; + uint64_t GEPOffset = TD.getIndexedOffset(PtrTy, Indices); // See if all uses can be converted. - if (!CanConvertToScalar(GEP, Offset+GEPOffset)) + if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx)) return false; IsNotTrivial = true; // Can't be mem2reg'd. HadNonMemTransferAccess = true; @@ -469,6 +537,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; // Store of constant value. if (!isa<ConstantInt>(MSI->getValue())) return false; @@ -493,6 +564,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a memcpy or memmove into or out of the whole allocation, we // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) return false; @@ -524,12 +598,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. By the end of this, there should be no uses of Ptr. void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, - uint64_t Offset) { + uint64_t Offset, + Value* NonConstantIdx) { while (!Ptr->use_empty()) { Instruction *User = cast<Instruction>(Ptr->use_back()); if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { - ConvertUsesToScalar(CI, NewAI, Offset); + ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx); CI->eraseFromParent(); continue; } @@ -537,9 +612,11 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + if (!GEP->hasAllConstantIndices()) + NonConstantIdx = Indices.pop_back_val(); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx); GEP->eraseFromParent(); continue; } @@ -550,7 +627,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // The load is a bit extract from NewAI shifted right by Offset bits. Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal - = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, + NonConstantIdx, Builder); LI->replaceAllUsesWith(NewLoadVal); LI->eraseFromParent(); continue; @@ -560,7 +638,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(SI->getOperand(0) != Ptr && "Consistency error!"); Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, - Builder); + NonConstantIdx, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); @@ -575,6 +653,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); + assert(!NonConstantIdx && "Cannot replace dynamic memset with insert"); int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue(); if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { unsigned NumBytes = static_cast<unsigned>(SNumBytes); @@ -591,7 +670,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, Builder); + Old, Offset, 0, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -607,6 +686,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { assert(Offset == 0 && "must be store to start of alloca"); + assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert"); // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store @@ -679,7 +759,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, /// shifted to the right. Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) @@ -701,7 +782,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + Value *V = Builder.CreateExtractElement(FromVal, Idx); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType); return V; @@ -710,23 +801,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. if (StructType *ST = dyn_cast<StructType>(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, Builder); + Offset+i*EltSize, 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -792,9 +887,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, /// /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. +/// +/// NonConstantIdx is an index value if there was a GEP with a non-constant +/// index value. If this is 0 then all GEPs used to find this insert address +/// are constant. Value *ConvertToScalarInfo:: ConvertScalar_InsertValue(Value *SV, Value *Old, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. Type *AllocaType = Old->getType(); @@ -815,26 +915,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, SV = Builder.CreateBitCast(SV, EltTy); uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + return Builder.CreateInsertElement(Old, SV, Idx); } // If SV is a first-class aggregate value, insert each value recursively. if (StructType *ST = dyn_cast<StructType>(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); } return Old; } if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); } return Old; } @@ -1335,15 +1449,14 @@ bool SROA::performPromotion(Function &F) { /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. -static bool ShouldAttemptScalarRepl(AllocaInst *AI) { +bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { Type *T = AI->getAllocatedType(); - // Do not promote any struct into more than 32 separate vars. + // Do not promote any struct that has too many members. if (StructType *ST = dyn_cast<StructType>(T)) - return ST->getNumElements() <= 32; - // Arrays are much less likely to be safe for SROA; only consider - // them if they are very small. + return ST->getNumElements() <= StructMemberThreshold; + // Do not promote any array that has too many elements. if (ArrayType *AT = dyn_cast<ArrayType>(T)) - return AT->getNumElements() <= 8; + return AT->getNumElements() <= ArrayElementThreshold; return false; } @@ -1448,8 +1561,8 @@ bool SROA::performScalarRepl(Function &F) { // promoted itself. If so, we don't want to transform it needlessly. Note // that we can't just check based on the type: the alloca may be of an i32 // but that has pointer arithmetic to set byte 3 of it or something. - if (AllocaInst *NewAI = - ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { + if (AllocaInst *NewAI = ConvertToScalarInfo( + (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) { NewAI->takeName(AI); AI->eraseFromParent(); ++NumConverted; @@ -1642,6 +1755,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); if (GEPIt == E) return; + bool NonConstant = false; + unsigned NonConstantIdxSize = 0; // Walk through the GEP type indices, checking the types that this indexes // into. @@ -1651,15 +1766,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info, GEPI); + if (!IdxVal) { + // Non constant GEPs are only a problem on arrays, structs, and pointers + // Vectors can be dynamically indexed. + // FIXME: Add support for dynamic indexing on arrays. This should be + // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] + // isn't. + if (!(*GEPIt)->isVectorTy()) + return MarkUnsafe(Info, GEPI); + NonConstant = true; + NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); + } } // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If this GEP is non constant then the last operand must have been a + // dynamic index into a vector. Pop this now as it has no impact on the + // constant part of the offset. + if (NonConstant) + Indices.pop_back(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, + NonConstantIdxSize)) MarkUnsafe(Info, GEPI); } @@ -1764,6 +1894,12 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { if (Offset >= AT->getNumElements() * EltSize) return false; Offset %= EltSize; + } else if (VectorType *VT = dyn_cast<VectorType>(T)) { + EltTy = VT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + if (Offset >= VT->getNumElements() * EltSize) + return false; + Offset %= EltSize; } else { return false; } @@ -1931,9 +2067,16 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Offset -= Layout->getElementOffset(Idx); IdxTy = Type::getInt32Ty(T->getContext()); return Idx; + } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = Offset / EltSize; + Offset -= Idx * EltSize; + IdxTy = Type::getInt64Ty(T->getContext()); + return Idx; } - ArrayType *AT = cast<ArrayType>(T); - T = AT->getElementType(); + VectorType *VT = cast<VectorType>(T); + T = VT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; @@ -1948,6 +2091,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { uint64_t OldOffset = Offset; SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If the GEP was dynamic then it must have been a dynamic vector lookup. + // In this case, it must be the last GEP operand which is dynamic so keep that + // aside until we've found the constant GEP offset then add it back in at the + // end. + Value* NonConstantIdx = 0; + if (!GEPI->hasAllConstantIndices()) + NonConstantIdx = Indices.pop_back_val(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); @@ -1974,6 +2124,17 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); } + if (NonConstantIdx) { + Type* GepTy = T; + // This GEP has a dynamic index. We need to add "i32 0" to index through + // any structs or arrays in the original type until we get to the vector + // to index. + while (!isa<VectorType>(GepTy)) { + NewArgs.push_back(Constant::getNullValue(i32Ty)); + GepTy = cast<CompositeType>(GepTy)->getTypeAtIndex(0U); + } + NewArgs.push_back(NonConstantIdx); + } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index a66b3e3825..91158b429e 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -89,7 +89,6 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// ChangeToCall - Convert the specified invoke into a normal call. static void ChangeToCall(InvokeInst *II) { - BasicBlock *BB = II->getParent(); SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -102,8 +101,8 @@ static void ChangeToCall(InvokeInst *II) { BranchInst::Create(II->getNormalDest(), II); // Update PHI nodes in the unwind destination - II->getUnwindDest()->removePredecessor(BB); - BB->getInstList().erase(II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); } static bool MarkAliveBlocks(BasicBlock *BB, @@ -157,11 +156,21 @@ static bool MarkAliveBlocks(BasicBlock *BB, } // Turn invokes that call 'nounwind' functions into ordinary calls. - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) - if (II->doesNotThrow()) { - ChangeToCall(II); + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + Value *Callee = II->getCalledValue(); + if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { + ChangeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow()) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + } else + ChangeToCall(II); Changed = true; } + } Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 99b05389b2..39647c7fc6 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -18,20 +18,20 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! using namespace llvm; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 3859a1aec4..5576432149 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -671,12 +671,3 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } -/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a -/// given basic block. -DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { - if (const Instruction *I = BB->getFirstNonPHI()) - return I->getDebugLoc(); - // Scanning entire block may be too expensive, if the first instruction - // does not have valid location info. - return DebugLoc(); -} diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 344b860bde..27f7724417 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -12,18 +12,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" +#include "llvm/Type.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/LLVMContext.h" -#include "llvm/Intrinsics.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7f5cb5e096..4ff31cae62 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -29,3 +29,5 @@ add_llvm_library(LLVMTransformUtils Utils.cpp ValueMapper.cpp ) + +add_dependencies(LLVMTransformUtils intrinsics_gen) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 20052a4122..99237b8390 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -28,7 +29,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" #include <map> using namespace llvm; diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index a0e027b5f1..1dac6b5b8b 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -53,7 +53,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I->isConstant(), I->getLinkage(), (Constant*) 0, I->getName(), (GlobalVariable*) 0, - I->isThreadLocal(), + I->getThreadLocalMode(), I->getType()->getAddressSpace()); GV->copyAttributesFrom(I); VMap[I] = GV; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index f10dbbeef2..c545cd68c9 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -664,7 +664,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1); + TheSwitch->removeCase(ToBeRemoved); break; } } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 9f8043d6fa..89e89e7acf 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,22 +13,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" -#include "llvm/Attributes.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index b08f8e21a0..bed7d72fff 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -14,31 +14,31 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -169,11 +169,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. SwitchInst::CaseIt FirstCase = SI->case_begin(); - IntegersSubset CaseRanges = FirstCase.getCaseValueEx(); - if (CaseRanges.getNumItems() == 1 && CaseRanges.isSingleNumber(0)) { + IntegersSubset& Case = FirstCase.getCaseValueEx(); + if (Case.isSingleNumber()) { // FIXME: Currently work with ConstantInt based numbers. Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - CaseRanges.getItem(0).getLow().toConstantInt(), + Case.getSingleNumber(0).toConstantInt(), "cond"); // Insert the new branch. @@ -183,7 +183,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // Delete the old switch. SI->eraseFromParent(); return true; - } } return false; @@ -266,7 +265,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa<UndefValue>(II->getArgOperand(1)); } - if (extractMallocCall(I) || extractCallocCall(I)) return true; + if (isAllocLikeFn(I)) return true; if (CallInst *CI = isFreeCall(I)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 8491c5582d..dbcf3b2fe2 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -14,8 +14,8 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 2357d81916..dd5e20ed50 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -28,14 +28,14 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index e60a41b786..b3f5289fcd 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -190,8 +190,11 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return V; } - // Set DebugLoc. - InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // Set the DebugLoc of the inserted PHI, if available. + DebugLoc DL; + if (const Instruction *I = BB->getFirstNonPHI()) + DL = I->getDebugLoc(); + InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -230,28 +233,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) { U.set(V); } -/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator -/// in the SSAUpdaterImpl template. -namespace { - class PHIiter { - private: - PHINode *PHI; - unsigned idx; - - public: - explicit PHIiter(PHINode *P) // begin iterator - : PHI(P), idx(0) {} - PHIiter(PHINode *P, bool) // end iterator - : PHI(P), idx(PHI->getNumIncomingValues()) {} - - PHIiter &operator++() { ++idx; return *this; } - bool operator==(const PHIiter& x) const { return idx == x.idx; } - bool operator!=(const PHIiter& x) const { return !operator==(x); } - Value *getIncomingValue() { return PHI->getIncomingValue(idx); } - BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } - }; -} - /// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template, /// specialized for SSAUpdater. namespace llvm { @@ -266,9 +247,26 @@ public: static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } - typedef PHIiter PHI_iterator; - static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } - static inline PHI_iterator PHI_end(PhiT *PHI) { + class PHI_iterator { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHI_iterator(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHI_iterator(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHI_iterator &operator++() { ++idx; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; + + static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3d4d50a80a..f37ea91397 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -16,30 +16,30 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Type.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MDBuilder.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> #include <map> @@ -129,7 +129,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { /// static bool isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, - Instruction* Cond, + Instruction *Cond, SmallVectorImpl<PHINode*> &PhiNodes) { if (SI1 == SI2) return false; // Can't merge with self! assert(SI1->isUnconditional() && SI2->isConditional()); @@ -156,7 +156,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, isa<PHINode>(BBI); ++BBI) { PHINode *PN = cast<PHINode>(BBI); if (PN->getIncomingValueForBlock(SI1BB) != Cond || - !isa<Constant>(PN->getIncomingValueForBlock(SI2BB))) + !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB))) return false; PhiNodes.push_back(PN); } @@ -1782,7 +1782,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - ConstantInt *PBI_C = dyn_cast<ConstantInt>( + ConstantInt *PBI_C = cast<ConstantInt>( PHIs[i]->getIncomingValueForBlock(PBI->getParent())); assert(PBI_C->getType()->isIntegerTy(1)); Instruction *MergedCond = 0; diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 1d08df59b3..62d23cb948 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -23,6 +23,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Pass.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -41,6 +42,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> #include <map> @@ -66,6 +68,10 @@ static cl::opt<unsigned> MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations")); +static cl::opt<bool> +Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, + cl::desc("Don't try to form non-2^n-length vectors")); + static cl::opt<unsigned> MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group")); @@ -76,6 +82,10 @@ MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), " a full cycle check")); static cl::opt<bool> +NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize boolean (i1) values")); + +static cl::opt<bool> NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize integer values")); @@ -104,6 +114,10 @@ NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize select instructions")); static cl::opt<bool> +NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize comparison instructions")); + +static cl::opt<bool> NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize getelementptr instructions")); @@ -182,12 +196,12 @@ namespace { // FIXME: const correct? - bool vectorizePairs(BasicBlock &BB); + bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts); + std::vector<Value *> &PairableInsts, bool NonPow2Len); void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, std::vector<Value *> &PairableInsts, @@ -211,7 +225,7 @@ namespace { bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore); + bool IsSimpleLoadStore, bool NonPow2Len); bool trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, @@ -263,26 +277,32 @@ namespace { bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool &FlipMemInputs); + Instruction *J, unsigned o, bool FlipMemInputs); void fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, - unsigned IdxOffset, std::vector<Constant*> &Mask); + unsigned MaskOffset, unsigned NumInElem, + unsigned NumInElem1, unsigned IdxOffset, + std::vector<Constant*> &Mask); Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, Instruction *J); + bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, + unsigned o, Value *&LOp, unsigned numElemL, + Type *ArgTypeL, Type *ArgTypeR, + unsigned IdxOff = 0); + Value *getReplacementInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, bool FlipMemInputs); void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, - bool &FlipMemInputs); + bool FlipMemInputs); void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, - Instruction *&K2, bool &FlipMemInputs); + Instruction *&K2, bool FlipMemInputs); void collectPairLoadMoveSet(BasicBlock &BB, DenseMap<Value *, Value *> &ChosenPairs, @@ -294,6 +314,10 @@ namespace { DenseMap<Value *, Value *> &ChosenPairs, std::multimap<Value *, Value *> &LoadMoveSet); + void collectPtrInfo(std::vector<Value *> &PairableInsts, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<Value *> &LowPtrInsts); + bool canMoveUsesOfIAfterJ(BasicBlock &BB, std::multimap<Value *, Value *> &LoadMoveSet, Instruction *I, Instruction *J); @@ -303,12 +327,15 @@ namespace { Instruction *&InsertionPt, Instruction *I, Instruction *J); + void combineMetadata(Instruction *K, const Instruction *J); + bool vectorizeBB(BasicBlock &BB) { bool changed = false; // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. - for (unsigned v = 2, n = 1; + unsigned n = 1; + for (unsigned v = 2; v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { DEBUG(dbgs() << "BBV: fusing loop #" << n << @@ -320,6 +347,16 @@ namespace { break; } + if (changed && !Pow2LenOnly) { + ++n; + for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { + DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << + n << " for " << BB.getName() << " in " << + BB.getParent()->getName() << "...\n"); + if (!vectorizePairs(BB, true)) break; + } + } + DEBUG(dbgs() << "BBV: done!\n"); return changed; } @@ -341,15 +378,43 @@ namespace { AU.setPreservesCFG(); } - // This returns the vector type that holds a pair of the provided type. - // If the provided type is already a vector, then its length is doubled. - static inline VectorType *getVecTypeForPair(Type *ElemTy) { + static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { + assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && + "Cannot form vector from incompatible scalar types"); + Type *STy = ElemTy->getScalarType(); + + unsigned numElem; if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) { - unsigned numElem = VTy->getNumElements(); - return VectorType::get(ElemTy->getScalarType(), numElem*2); + numElem = VTy->getNumElements(); + } else { + numElem = 1; } - return VectorType::get(ElemTy, 2); + if (VectorType *VTy = dyn_cast<VectorType>(Elem2Ty)) { + numElem += VTy->getNumElements(); + } else { + numElem += 1; + } + + return VectorType::get(STy, numElem); + } + + static inline void getInstructionTypes(Instruction *I, + Type *&T1, Type *&T2) { + if (isa<StoreInst>(I)) { + // For stores, it is the value type, not the pointer type that matters + // because the value is what will come from a vector register. + + Value *IVal = cast<StoreInst>(I)->getValueOperand(); + T1 = IVal->getType(); + } else { + T1 = I->getType(); + } + + if (I->isCast()) + T2 = cast<CastInst>(I)->getSrcTy(); + else + T2 = T1; } // Returns the weight associated with the provided value. A chain of @@ -385,8 +450,7 @@ namespace { // true if the offset could be determined to be some constant value. // For example, if OffsetInElmts == 1, then J accesses the memory directly // after I; if OffsetInElmts == -1 then I accesses the memory - // directly after J. This function assumes that both instructions - // have the same type. + // directly after J. bool getPairPtrInfo(Instruction *I, Instruction *J, Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, int64_t &OffsetInElmts) { @@ -418,7 +482,12 @@ namespace { Type *VTy = cast<PointerType>(IPtr->getType())->getElementType(); int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); - assert(VTy == cast<PointerType>(JPtr->getType())->getElementType()); + Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType(); + if (VTy != VTy2 && Offset < 0) { + int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); + OffsetInElmts = Offset/VTy2TSS; + return (abs64(Offset) % VTy2TSS) == 0; + } OffsetInElmts = Offset/VTyTSS; return (abs64(Offset) % VTyTSS) == 0; @@ -471,7 +540,7 @@ namespace { // This function implements one vectorization iteration on the provided // basic block. It returns true if the block is changed. - bool BBVectorize::vectorizePairs(BasicBlock &BB) { + bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { bool ShouldContinue; BasicBlock::iterator Start = BB.getFirstInsertionPt(); @@ -482,7 +551,7 @@ namespace { std::vector<Value *> PairableInsts; std::multimap<Value *, Value *> CandidatePairs; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, - PairableInsts); + PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; // Now we have a map of all of the pairable instructions and we need to @@ -529,6 +598,10 @@ namespace { // passes should coalesce the build/extract combinations. fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); + + // It is important to cleanup here so that future iterations of this + // function have less work to do. + (void) SimplifyInstructionsInBlock(&BB, TD); return true; } @@ -567,6 +640,9 @@ namespace { } else if (isa<SelectInst>(I)) { if (!Config.VectorizeSelect) return false; + } else if (isa<CmpInst>(I)) { + if (!Config.VectorizeCmp) + return false; } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { if (!Config.VectorizeGEP) return false; @@ -584,30 +660,22 @@ namespace { return false; Type *T1, *T2; - if (isa<StoreInst>(I)) { - // For stores, it is the value type, not the pointer type that matters - // because the value is what will come from a vector register. - - Value *IVal = cast<StoreInst>(I)->getValueOperand(); - T1 = IVal->getType(); - } else { - T1 = I->getType(); - } - - if (I->isCast()) - T2 = cast<CastInst>(I)->getSrcTy(); - else - T2 = T1; + getInstructionTypes(I, T1, T2); // Not every type can be vectorized... if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (!Config.VectorizeInts - && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) - return false; - + if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) { + if (!Config.VectorizeBools) + return false; + } else { + if (!Config.VectorizeInts + && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + return false; + } + if (!Config.VectorizeFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; @@ -623,8 +691,8 @@ namespace { T2->getScalarType()->isPointerTy())) return false; - if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || - T2->getPrimitiveSizeInBits() > Config.VectorBits/2) + if (T1->getPrimitiveSizeInBits() >= Config.VectorBits || + T2->getPrimitiveSizeInBits() >= Config.VectorBits) return false; return true; @@ -635,36 +703,25 @@ namespace { // that I has already been determined to be vectorizable and that J is not // in the use tree of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore) { + bool IsSimpleLoadStore, bool NonPow2Len) { DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << " <-> " << *J << "\n"); // Loads and stores can be merged if they have different alignments, // but are otherwise the same. - LoadInst *LI, *LJ; - StoreInst *SI, *SJ; - if ((LI = dyn_cast<LoadInst>(I)) && (LJ = dyn_cast<LoadInst>(J))) { - if (I->getType() != J->getType()) - return false; + if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | + (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) + return false; - if (LI->getPointerOperand()->getType() != - LJ->getPointerOperand()->getType() || - LI->isVolatile() != LJ->isVolatile() || - LI->getOrdering() != LJ->getOrdering() || - LI->getSynchScope() != LJ->getSynchScope()) - return false; - } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) { - if (SI->getValueOperand()->getType() != - SJ->getValueOperand()->getType() || - SI->getPointerOperand()->getType() != - SJ->getPointerOperand()->getType() || - SI->isVolatile() != SJ->isVolatile() || - SI->getOrdering() != SJ->getOrdering() || - SI->getSynchScope() != SJ->getSynchScope()) - return false; - } else if (!J->isSameOperationAs(I)) { + Type *IT1, *IT2, *JT1, *JT2; + getInstructionTypes(I, IT1, IT2); + getInstructionTypes(J, JT1, JT2); + unsigned MaxTypeBits = std::max( + IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), + IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); + if (MaxTypeBits > Config.VectorBits) return false; - } + // FIXME: handle addsub-type operations! if (IsSimpleLoadStore) { @@ -674,8 +731,11 @@ namespace { if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts) && abs64(OffsetInElmts) == 1) { if (Config.AlignedOnly) { - Type *aType = isa<StoreInst>(I) ? + Type *aTypeI = isa<StoreInst>(I) ? cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); + Type *aTypeJ = isa<StoreInst>(J) ? + cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); + // An aligned load or store is possible only if the instruction // with the lower offset has an alignment suitable for the // vector type. @@ -683,7 +743,7 @@ namespace { unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; - Type *VType = getVecTypeForPair(aType); + Type *VType = getVecTypeForPair(aTypeI, aTypeJ); unsigned VecAlignment = TD->getPrefTypeAlignment(VType); if (BottomAlignment < VecAlignment) return false; @@ -691,11 +751,6 @@ namespace { } else { return false; } - } else if (isa<ShuffleVectorInst>(I)) { - // Only merge two shuffles if they're both constant - return isa<Constant>(I->getOperand(2)) && - isa<Constant>(J->getOperand(2)); - // FIXME: We may want to vectorize non-constant shuffles also. } // The powi intrinsic is special because only the first argument is @@ -778,7 +833,7 @@ namespace { bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts) { + std::vector<Value *> &PairableInsts, bool NonPow2Len) { BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -814,7 +869,7 @@ namespace { // J does not use I, and comes before the first use of I, so it can be // merged with I if the instructions are compatible. - if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue; + if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue; // J is a candidate for merging with I. if (!PairableInsts.size() || @@ -1436,24 +1491,27 @@ namespace { // instruction that fuses I with J. Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, - bool &FlipMemInputs) { + bool FlipMemInputs) { Value *IPtr, *JPtr; unsigned IAlignment, JAlignment; int64_t OffsetInElmts; + + // Note: the analysis might fail here, that is why FlipMemInputs has + // been precomputed (OffsetInElmts must be unused here). (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts); // The pointer value is taken to be the one with the lowest offset. Value *VPtr; - if (OffsetInElmts > 0) { + if (!FlipMemInputs) { VPtr = IPtr; } else { - FlipMemInputs = true; VPtr = JPtr; } - Type *ArgType = cast<PointerType>(IPtr->getType())->getElementType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); + Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); Type *VArgPtrType = PointerType::get(VArgType, cast<PointerType>(IPtr->getType())->getAddressSpace()); return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), @@ -1461,15 +1519,17 @@ namespace { } void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, - unsigned IdxOffset, std::vector<Constant*> &Mask) { - for (unsigned v = 0; v < NumElem/2; ++v) { + unsigned MaskOffset, unsigned NumInElem, + unsigned NumInElem1, unsigned IdxOffset, + std::vector<Constant*> &Mask) { + unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements(); + for (unsigned v = 0; v < NumElem1; ++v) { int m = cast<ShuffleVectorInst>(J)->getMaskValue(v); if (m < 0) { Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); } else { unsigned mm = m + (int) IdxOffset; - if (m >= (int) NumInElem) + if (m >= (int) NumInElem1) mm += (int) NumInElem; Mask[v+MaskOffset] = @@ -1485,8 +1545,11 @@ namespace { // This is the shuffle mask. We need to append the second // mask to the first, and the numbers need to be adjusted. - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = I->getType(); + Type *ArgTypeJ = J->getType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); + + unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements(); // Get the total number of elements in the fused vector type. // By definition, this must equal the number of elements in @@ -1494,19 +1557,81 @@ namespace { unsigned NumElem = cast<VectorType>(VArgType)->getNumElements(); std::vector<Constant*> Mask(NumElem); - Type *OpType = I->getOperand(0)->getType(); - unsigned NumInElem = cast<VectorType>(OpType)->getNumElements(); + Type *OpTypeI = I->getOperand(0)->getType(); + unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements(); + Type *OpTypeJ = J->getOperand(0)->getType(); + unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements(); + + // The fused vector will be: + // ----------------------------------------------------- + // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | + // ----------------------------------------------------- + // from which we'll extract NumElem total elements (where the first NumElemI + // of them come from the mask in I and the remainder come from the mask + // in J. // For the mask from the first pair... - fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask); + fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, + 0, Mask); // For the mask from the second pair... - fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem, - Mask); + fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, + NumInElemI, Mask); return ConstantVector::get(Mask); } + bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, + Instruction *J, unsigned o, Value *&LOp, + unsigned numElemL, + Type *ArgTypeL, Type *ArgTypeH, + unsigned IdxOff) { + bool ExpandedIEChain = false; + if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { + // If we have a pure insertelement chain, then this can be rewritten + // into a chain that directly builds the larger type. + bool PureChain = true; + InsertElementInst *LIENext = LIE; + do { + if (!isa<UndefValue>(LIENext->getOperand(0)) && + !isa<InsertElementInst>(LIENext->getOperand(0))) { + PureChain = false; + break; + } + } while ((LIENext = + dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); + + if (PureChain) { + SmallVector<Value *, 8> VectElemts(numElemL, + UndefValue::get(ArgTypeL->getScalarType())); + InsertElementInst *LIENext = LIE; + do { + unsigned Idx = + cast<ConstantInt>(LIENext->getOperand(2))->getSExtValue(); + VectElemts[Idx] = LIENext->getOperand(1); + } while ((LIENext = + dyn_cast<InsertElementInst>(LIENext->getOperand(0)))); + + LIENext = 0; + Value *LIEPrev = UndefValue::get(ArgTypeH); + for (unsigned i = 0; i < numElemL; ++i) { + if (isa<UndefValue>(VectElemts[i])) continue; + LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], + ConstantInt::get(Type::getInt32Ty(Context), + i + IdxOff), + getReplacementName(I, true, o, i+1)); + LIENext->insertBefore(J); + LIEPrev = LIENext; + } + + LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); + ExpandedIEChain = true; + } + } + + return ExpandedIEChain; + } + // Returns the value to be used as the specified operand of the vector // instruction that fuses I with J. Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, @@ -1514,84 +1639,333 @@ namespace { Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); - // Compute the fused vector type for this operand - Type *ArgType = I->getOperand(o)->getType(); - VectorType *VArgType = getVecTypeForPair(ArgType); + // Compute the fused vector type for this operand + Type *ArgTypeI = I->getOperand(o)->getType(); + Type *ArgTypeJ = J->getOperand(o)->getType(); + VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); Instruction *L = I, *H = J; + Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; if (FlipMemInputs) { L = J; H = I; + ArgTypeL = ArgTypeJ; + ArgTypeH = ArgTypeI; } - if (ArgType->isVectorTy()) { - unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); - std::vector<Constant*> Mask(numElem); - for (unsigned v = 0; v < numElem; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + unsigned numElemL; + if (ArgTypeL->isVectorTy()) + numElemL = cast<VectorType>(ArgTypeL)->getNumElements(); + else + numElemL = 1; - Instruction *BV = new ShuffleVectorInst(L->getOperand(o), - H->getOperand(o), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); - BV->insertBefore(J); - return BV; + unsigned numElemH; + if (ArgTypeH->isVectorTy()) + numElemH = cast<VectorType>(ArgTypeH)->getNumElements(); + else + numElemH = 1; + + Value *LOp = L->getOperand(o); + Value *HOp = H->getOperand(o); + unsigned numElem = VArgType->getNumElements(); + + // First, we check if we can reuse the "original" vector outputs (if these + // exist). We might need a shuffle. + ExtractElementInst *LEE = dyn_cast<ExtractElementInst>(LOp); + ExtractElementInst *HEE = dyn_cast<ExtractElementInst>(HOp); + ShuffleVectorInst *LSV = dyn_cast<ShuffleVectorInst>(LOp); + ShuffleVectorInst *HSV = dyn_cast<ShuffleVectorInst>(HOp); + + // FIXME: If we're fusing shuffle instructions, then we can't apply this + // optimization. The input vectors to the shuffle might be a different + // length from the shuffle outputs. Unfortunately, the replacement + // shuffle mask has already been formed, and the mask entries are sensitive + // to the sizes of the inputs. + bool IsSizeChangeShuffle = + isa<ShuffleVectorInst>(L) && + (LOp->getType() != L->getType() || HOp->getType() != H->getType()); + + if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { + // We can have at most two unique vector inputs. + bool CanUseInputs = true; + Value *I1, *I2 = 0; + if (LEE) { + I1 = LEE->getOperand(0); + } else { + I1 = LSV->getOperand(0); + I2 = LSV->getOperand(1); + if (I2 == I1 || isa<UndefValue>(I2)) + I2 = 0; + } + + if (HEE) { + Value *I3 = HEE->getOperand(0); + if (!I2 && I3 != I1) + I2 = I3; + else if (I3 != I1 && I3 != I2) + CanUseInputs = false; + } else { + Value *I3 = HSV->getOperand(0); + if (!I2 && I3 != I1) + I2 = I3; + else if (I3 != I1 && I3 != I2) + CanUseInputs = false; + + if (CanUseInputs) { + Value *I4 = HSV->getOperand(1); + if (!isa<UndefValue>(I4)) { + if (!I2 && I4 != I1) + I2 = I4; + else if (I4 != I1 && I4 != I2) + CanUseInputs = false; + } + } + } + + if (CanUseInputs) { + unsigned LOpElem = + cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType()) + ->getNumElements(); + unsigned HOpElem = + cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType()) + ->getNumElements(); + + // We have one or two input vectors. We need to map each index of the + // operands to the index of the original vector. + SmallVector<std::pair<int, int>, 8> II(numElem); + for (unsigned i = 0; i < numElemL; ++i) { + int Idx, INum; + if (LEE) { + Idx = + cast<ConstantInt>(LEE->getOperand(1))->getSExtValue(); + INum = LEE->getOperand(0) == I1 ? 0 : 1; + } else { + Idx = LSV->getMaskValue(i); + if (Idx < (int) LOpElem) { + INum = LSV->getOperand(0) == I1 ? 0 : 1; + } else { + Idx -= LOpElem; + INum = LSV->getOperand(1) == I1 ? 0 : 1; + } + } + + II[i] = std::pair<int, int>(Idx, INum); + } + for (unsigned i = 0; i < numElemH; ++i) { + int Idx, INum; + if (HEE) { + Idx = + cast<ConstantInt>(HEE->getOperand(1))->getSExtValue(); + INum = HEE->getOperand(0) == I1 ? 0 : 1; + } else { + Idx = HSV->getMaskValue(i); + if (Idx < (int) HOpElem) { + INum = HSV->getOperand(0) == I1 ? 0 : 1; + } else { + Idx -= HOpElem; + INum = HSV->getOperand(1) == I1 ? 0 : 1; + } + } + + II[i + numElemL] = std::pair<int, int>(Idx, INum); + } + + // We now have an array which tells us from which index of which + // input vector each element of the operand comes. + VectorType *I1T = cast<VectorType>(I1->getType()); + unsigned I1Elem = I1T->getNumElements(); + + if (!I2) { + // In this case there is only one underlying vector input. Check for + // the trivial case where we can use the input directly. + if (I1Elem == numElem) { + bool ElemInOrder = true; + for (unsigned i = 0; i < numElem; ++i) { + if (II[i].first != (int) i && II[i].first != -1) { + ElemInOrder = false; + break; + } + } + + if (ElemInOrder) + return I1; + } + + // A shuffle is needed. + std::vector<Constant *> Mask(numElem); + for (unsigned i = 0; i < numElem; ++i) { + int Idx = II[i].first; + if (Idx == -1) + Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); + else + Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + + Instruction *S = + new ShuffleVectorInst(I1, UndefValue::get(I1T), + ConstantVector::get(Mask), + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } + + VectorType *I2T = cast<VectorType>(I2->getType()); + unsigned I2Elem = I2T->getNumElements(); + + // This input comes from two distinct vectors. The first step is to + // make sure that both vectors are the same length. If not, the + // smaller one will need to grow before they can be shuffled together. + if (I1Elem < I2Elem) { + std::vector<Constant *> Mask(I2Elem); + unsigned v = 0; + for (; v < I1Elem; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < I2Elem; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + Instruction *NewI1 = + new ShuffleVectorInst(I1, UndefValue::get(I1T), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + NewI1->insertBefore(J); + I1 = NewI1; + I1T = I2T; + I1Elem = I2Elem; + } else if (I1Elem > I2Elem) { + std::vector<Constant *> Mask(I1Elem); + unsigned v = 0; + for (; v < I2Elem; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < I1Elem; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + Instruction *NewI2 = + new ShuffleVectorInst(I2, UndefValue::get(I2T), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + NewI2->insertBefore(J); + I2 = NewI2; + I2T = I1T; + I2Elem = I1Elem; + } + + // Now that both I1 and I2 are the same length we can shuffle them + // together (and use the result). + std::vector<Constant *> Mask(numElem); + for (unsigned v = 0; v < numElem; ++v) { + if (II[v].first == -1) { + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + } else { + int Idx = II[v].first + II[v].second * I1Elem; + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + } + + Instruction *NewOp = + new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), + getReplacementName(I, true, o)); + NewOp->insertBefore(J); + return NewOp; + } } - // If these two inputs are the output of another vector instruction, - // then we should use that output directly. It might be necessary to - // permute it first. [When pairings are fused recursively, you can - // end up with cases where a large vector is decomposed into scalars - // using extractelement instructions, then built into size-2 - // vectors using insertelement and the into larger vectors using - // shuffles. InstCombine does not simplify all of these cases well, - // and so we make sure that shuffles are generated here when possible. - ExtractElementInst *LEE - = dyn_cast<ExtractElementInst>(L->getOperand(o)); - ExtractElementInst *HEE - = dyn_cast<ExtractElementInst>(H->getOperand(o)); - - if (LEE && HEE && - LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) { - VectorType *EEType = cast<VectorType>(LEE->getOperand(0)->getType()); - unsigned LowIndx = cast<ConstantInt>(LEE->getOperand(1))->getZExtValue(); - unsigned HighIndx = cast<ConstantInt>(HEE->getOperand(1))->getZExtValue(); - if (LEE->getOperand(0) == HEE->getOperand(0)) { - if (LowIndx == 0 && HighIndx == 1) - return LEE->getOperand(0); - - std::vector<Constant*> Mask(2); - Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); - Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); - - Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), - UndefValue::get(EEType), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); - BV->insertBefore(J); - return BV; + Type *ArgType = ArgTypeL; + if (numElemL < numElemH) { + if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, + ArgTypeL, VArgType, 1)) { + // This is another short-circuit case: we're combining a scalar into + // a vector that is formed by an IE chain. We've just expanded the IE + // chain, now insert the scalar and we're done. + + Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, + ArgTypeH)) { + // The two vector inputs to the shuffle must be the same length, + // so extend the smaller vector to be the same length as the larger one. + Instruction *NLOp; + if (numElemL > 1) { + + std::vector<Constant *> Mask(numElemH); + unsigned v = 0; + for (; v < numElemL; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < numElemH; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + } else { + NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, + getReplacementName(I, true, o, 1)); + } + + NLOp->insertBefore(J); + LOp = NLOp; } - std::vector<Constant*> Mask(2); - HighIndx += EEType->getNumElements(); - Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); - Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); + ArgType = ArgTypeH; + } else if (numElemL > numElemH) { + if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, + ArgTypeH, VArgType)) { + Instruction *S = + InsertElementInst::Create(LOp, HOp, + ConstantInt::get(Type::getInt32Ty(Context), + numElemL), + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, + ArgTypeL)) { + Instruction *NHOp; + if (numElemH > 1) { + std::vector<Constant *> Mask(numElemL); + unsigned v = 0; + for (; v < numElemH; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < numElemL; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + } else { + NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, + getReplacementName(I, true, o, 1)); + } + + NHOp->insertBefore(J); + HOp = NHOp; + } + } - Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), - HEE->getOperand(0), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); + if (ArgType->isVectorTy()) { + unsigned numElem = cast<VectorType>(VArgType)->getNumElements(); + std::vector<Constant*> Mask(numElem); + for (unsigned v = 0; v < numElem; ++v) { + unsigned Idx = v; + // If the low vector was expanded, we need to skip the extra + // undefined entries. + if (v >= numElemL && numElemH > numElemL) + Idx += (numElemH - numElemL); + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + + Instruction *BV = new ShuffleVectorInst(LOp, HOp, + ConstantVector::get(Mask), + getReplacementName(I, true, o)); BV->insertBefore(J); return BV; } Instruction *BV1 = InsertElementInst::Create( - UndefValue::get(VArgType), - L->getOperand(o), CV0, + UndefValue::get(VArgType), LOp, CV0, getReplacementName(I, true, o, 1)); BV1->insertBefore(I); - Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o), - CV1, + Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, getReplacementName(I, true, o, 2)); BV2->insertBefore(J); return BV2; @@ -1602,8 +1976,7 @@ namespace { void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, - bool &FlipMemInputs) { - FlipMemInputs = false; + bool FlipMemInputs) { unsigned NumOperands = I->getNumOperands(); for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { @@ -1622,10 +1995,10 @@ namespace { BasicBlock &BB = *I->getParent(); Module *M = BB.getParent()->getParent(); - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = I->getType(); + Type *ArgTypeJ = J->getType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - // FIXME: is it safe to do this here? ReplacedOperands[o] = Intrinsic::getDeclaration(M, (Intrinsic::ID) IID, VArgType); continue; @@ -1654,36 +2027,60 @@ namespace { Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, Instruction *&K2, - bool &FlipMemInputs) { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); - + bool FlipMemInputs) { if (isa<StoreInst>(I)) { AA->replaceWithNewValue(I, K); AA->replaceWithNewValue(J, K); } else { Type *IType = I->getType(); - Type *VType = getVecTypeForPair(IType); + Type *JType = J->getType(); + + VectorType *VType = getVecTypeForPair(IType, JType); + unsigned numElem = VType->getNumElements(); + + unsigned numElemI, numElemJ; + if (IType->isVectorTy()) + numElemI = cast<VectorType>(IType)->getNumElements(); + else + numElemI = 1; + + if (JType->isVectorTy()) + numElemJ = cast<VectorType>(JType)->getNumElements(); + else + numElemJ = 1; if (IType->isVectorTy()) { - unsigned numElem = cast<VectorType>(IType)->getNumElements(); - std::vector<Constant*> Mask1(numElem), Mask2(numElem); - for (unsigned v = 0; v < numElem; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v); - } + std::vector<Constant*> Mask1(numElemI), Mask2(numElemI); + for (unsigned v = 0; v < numElemI; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); + } - K1 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask2 : Mask1), - getReplacementName(K, false, 1)); - K2 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask1 : Mask2), - getReplacementName(K, false, 2)); + K1 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get( + FlipMemInputs ? Mask2 : Mask1), + getReplacementName(K, false, 1)); } else { + Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); + Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0, getReplacementName(K, false, 1)); + } + + if (JType->isVectorTy()) { + std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ); + for (unsigned v = 0; v < numElemJ; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); + } + + K2 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get( + FlipMemInputs ? Mask1 : Mask2), + getReplacementName(K, false, 2)); + } else { + Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); + Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1, getReplacementName(K, false, 2)); } @@ -1784,6 +2181,61 @@ namespace { } } + // As with the aliasing information, SCEV can also change because of + // vectorization. This information is used to compute relative pointer + // offsets; the necessary information will be cached here prior to + // fusion. + void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<Value *> &LowPtrInsts) { + for (std::vector<Value *>::iterator PI = PairableInsts.begin(), + PIE = PairableInsts.end(); PI != PIE; ++PI) { + DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); + if (P == ChosenPairs.end()) continue; + + Instruction *I = cast<Instruction>(P->first); + Instruction *J = cast<Instruction>(P->second); + + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + continue; + + Value *IPtr, *JPtr; + unsigned IAlignment, JAlignment; + int64_t OffsetInElmts; + if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + OffsetInElmts) || abs64(OffsetInElmts) != 1) + llvm_unreachable("Pre-fusion pointer analysis failed"); + + Value *LowPI = (OffsetInElmts > 0) ? I : J; + LowPtrInsts.insert(LowPI); + } + } + + // When the first instruction in each pair is cloned, it will inherit its + // parent's metadata. This metadata must be combined with that of the other + // instruction in a safe way. + void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { + SmallVector<std::pair<unsigned, MDNode*>, 4> Metadata; + K->getAllMetadataOtherThanDebugLoc(Metadata); + for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { + unsigned Kind = Metadata[i].first; + MDNode *JMD = J->getMetadata(Kind); + MDNode *KMD = Metadata[i].second; + + switch (Kind) { + default: + K->setMetadata(Kind, 0); // Remove unknown metadata + break; + case LLVMContext::MD_tbaa: + K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); + break; + case LLVMContext::MD_fpmath: + K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); + break; + } + } + } + // This function fuses the chosen instruction pairs into vector instructions, // taking care preserve any needed scalar outputs and, then, it reorders the // remaining instructions as needed (users of the first member of the pair @@ -1810,6 +2262,9 @@ namespace { std::multimap<Value *, Value *> LoadMoveSet; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); + DenseSet<Value *> LowPtrInsts; + collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); + DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { @@ -1849,7 +2304,10 @@ namespace { continue; } - bool FlipMemInputs; + bool FlipMemInputs = false; + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); + unsigned NumOperands = I->getNumOperands(); SmallVector<Value *, 3> ReplacedOperands(NumOperands); getReplacementInputsForPair(Context, I, J, ReplacedOperands, @@ -1861,7 +2319,9 @@ namespace { if (I->hasName()) K->takeName(I); if (!isa<StoreInst>(K)) - K->mutateType(getVecTypeForPair(I->getType())); + K->mutateType(getVecTypeForPair(I->getType(), J->getType())); + + combineMetadata(K, J); for (unsigned o = 0; o < NumOperands; ++o) K->setOperand(o, ReplacedOperands[o]); @@ -1953,6 +2413,7 @@ llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { //===----------------------------------------------------------------------===// VectorizeConfig::VectorizeConfig() { VectorBits = ::VectorBits; + VectorizeBools = !::NoBools; VectorizeInts = !::NoInts; VectorizeFloats = !::NoFloats; VectorizePointers = !::NoPointers; @@ -1960,6 +2421,7 @@ VectorizeConfig::VectorizeConfig() { VectorizeMath = !::NoMath; VectorizeFMA = !::NoFMA; VectorizeSelect = !::NoSelect; + VectorizeCmp = !::NoCmp; VectorizeGEP = !::NoGEP; VectorizeMemOps = !::NoMemOps; AlignedOnly = ::AlignedOnly; @@ -1969,6 +2431,7 @@ VectorizeConfig::VectorizeConfig() { SplatBreaksChain = ::SplatBreaksChain; MaxInsts = ::MaxInsts; MaxIter = ::MaxIter; + Pow2LenOnly = ::Pow2LenOnly; NoMemOpBoost = ::NoMemOpBoost; FastDep = ::FastDep; } diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt index 4b6693015c..06cf1e4e53 100644 --- a/lib/Transforms/Vectorize/CMakeLists.txt +++ b/lib/Transforms/Vectorize/CMakeLists.txt @@ -2,3 +2,5 @@ add_llvm_library(LLVMVectorize BBVectorize.cpp Vectorize.cpp ) + +add_dependencies(LLVMVectorize intrinsics_gen) diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 4a7fde1905..99e2687bbe 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -20,6 +20,7 @@ #include "llvm/LLVMContext.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" @@ -1376,6 +1377,26 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis, } } +static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM, + formatted_raw_ostream &Out) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "thread_local "; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "thread_local(localdynamic) "; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "thread_local(initialexec) "; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "thread_local(localexec) "; + break; + } +} + void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->isMaterializable()) Out << "; Materializable\n"; @@ -1388,8 +1409,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { PrintLinkage(GV->getLinkage(), Out); PrintVisibility(GV->getVisibility(), Out); + PrintThreadLocalModel(GV->getThreadLocalMode(), Out); - if (GV->isThreadLocal()) Out << "thread_local "; if (unsigned AddressSpace = GV->getType()->getAddressSpace()) Out << "addrspace(" << AddressSpace << ") "; if (GV->hasUnnamedAddr()) Out << "unnamed_addr "; @@ -2011,20 +2032,21 @@ static void WriteMDNodeComment(const MDNode *Node, formatted_raw_ostream &Out) { if (Node->getNumOperands() < 1) return; - ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0)); - if (!CI) return; - APInt Val = CI->getValue(); - APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask); - if (Val.ult(LLVMDebugVersion11)) + + Value *Op = Node->getOperand(0); + if (!Op || !isa<ConstantInt>(Op) || cast<ConstantInt>(Op)->getBitWidth() < 32) return; + DIDescriptor Desc(Node); + if (Desc.getVersion() < LLVMDebugVersion11) + return; + + unsigned Tag = Desc.getTag(); Out.PadToColumn(50); if (Tag == dwarf::DW_TAG_user_base) Out << "; [ DW_TAG_user_base ]"; - else if (Tag.isIntN(32)) { - if (const char *TagName = dwarf::TagString(Tag.getZExtValue())) - Out << "; [ " << TagName << " ]"; - } + else if (const char *TagName = dwarf::TagString(Tag)) + Out << "; [ " << TagName << " ]"; } void AssemblyWriter::writeAllMDNodes() { diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 5c2d63bc7c..1defd20728 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -14,14 +14,14 @@ #include "llvm/AutoUpgrade.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instruction.h" +#include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include <cstring> using namespace llvm; diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index e1efcdadc7..648ccbdf38 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -8,7 +8,9 @@ add_llvm_library(LLVMCore ConstantFold.cpp Constants.cpp Core.cpp + DebugInfo.cpp DebugLoc.cpp + DIBuilder.cpp Dominators.cpp Function.cpp GCOV.cpp @@ -36,3 +38,14 @@ add_llvm_library(LLVMCore ValueTypes.cpp Verifier.cpp ) + +# Workaround: It takes over 20 minutes to compile with msvc10. +# FIXME: Suppressing optimizations to core libraries would not be good thing. +if( MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE Function.cpp + PROPERTY COMPILE_FLAGS "/Og-" + ) +endif() + +add_dependencies(LLVMCore intrinsics_gen) diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 30d8a9b12f..972db3cb86 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1210,7 +1210,7 @@ LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, unsigned AddressSpace) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, GlobalValue::ExternalLinkage, 0, Name, 0, - false, AddressSpace)); + GlobalVariable::NotThreadLocal, AddressSpace)); } LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) { diff --git a/lib/Analysis/DIBuilder.cpp b/lib/VMCore/DIBuilder.cpp index 4fe0fc26f1..9e7adcf750 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/VMCore/DIBuilder.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DIBuilder.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DIBuilder.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Analysis/DebugInfo.cpp b/lib/VMCore/DebugInfo.cpp index 194aed8b9f..78df0aa630 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/VMCore/DebugInfo.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Intrinsics.h" @@ -503,28 +503,28 @@ bool DINameSpace::Verify() const { uint64_t DIDerivedType::getOriginalTypeSize() const { unsigned Tag = getTag(); - if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || - Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || - Tag == dwarf::DW_TAG_restrict_type) { - DIType BaseType = getTypeDerivedFrom(); - // If this type is not derived from any type then take conservative - // approach. - if (!BaseType.isValid()) - return getSizeInBits(); - // If this is a derived type, go ahead and get the base type, unless - // it's a reference then it's just the size of the field. Pointer types - // have no need of this since they're a different type of qualification - // on the type. - if (BaseType.getTag() == dwarf::DW_TAG_reference_type || - BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) - return getSizeInBits(); - else if (BaseType.isDerivedType()) - return DIDerivedType(BaseType).getOriginalTypeSize(); - else - return BaseType.getSizeInBits(); - } + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return getSizeInBits(); + + DIType BaseType = getTypeDerivedFrom(); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return getSizeInBits(); + + if (BaseType.isDerivedType()) + return DIDerivedType(BaseType).getOriginalTypeSize(); - return getSizeInBits(); + return BaseType.getSizeInBits(); } /// getObjCProperty - Return property node, if this ivar is associated with one. @@ -542,7 +542,7 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) { return false; // This variable is not inlined function argument if its scope // does not describe current function. - return !(DISubprogram(getContext()).describes(CurFn)); + return !DISubprogram(getContext()).describes(CurFn); } /// describes - Return true if this subprogram provides debugging @@ -664,257 +664,6 @@ DIArray DICompileUnit::getGlobalVariables() const { return DIArray(); } -//===----------------------------------------------------------------------===// -// DIDescriptor: vtable anchors for all descriptors. -//===----------------------------------------------------------------------===// - -void DIScope::anchor() { } - -void DICompileUnit::anchor() { } - -void DIFile::anchor() { } - -void DIType::anchor() { } - -void DIBasicType::anchor() { } - -void DIDerivedType::anchor() { } - -void DICompositeType::anchor() { } - -void DISubprogram::anchor() { } - -void DILexicalBlock::anchor() { } - -void DINameSpace::anchor() { } - -void DILexicalBlockFile::anchor() { } - -//===----------------------------------------------------------------------===// -// DIDescriptor: dump routines for all descriptors. -//===----------------------------------------------------------------------===// - - -/// print - Print descriptor. -void DIDescriptor::print(raw_ostream &OS) const { - OS << "[" << dwarf::TagString(getTag()) << "] "; - OS.write_hex((intptr_t) &*DbgNode) << ']'; -} - -/// print - Print compile unit. -void DICompileUnit::print(raw_ostream &OS) const { - if (getLanguage()) - OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; - - OS << " [" << getDirectory() << "/" << getFilename() << "]"; -} - -/// print - Print type. -void DIType::print(raw_ostream &OS) const { - if (!DbgNode) return; - - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" - << "line " << getLineNumber() << ", " - << getSizeInBits() << " bits, " - << getAlignInBits() << " bit alignment, " - << getOffsetInBits() << " bit offset" - << "] "; - - if (isPrivate()) - OS << " [private] "; - else if (isProtected()) - OS << " [protected] "; - - if (isForwardDecl()) - OS << " [fwd] "; - - if (isBasicType()) - DIBasicType(DbgNode).print(OS); - else if (isDerivedType()) { - DIDerivedType DTy = DIDerivedType(DbgNode); - DTy.print(OS); - DICompositeType CTy = getDICompositeType(DTy); - if (CTy.Verify()) - CTy.print(OS); - } - else if (isCompositeType()) - DICompositeType(DbgNode).print(OS); - else { - OS << "Invalid DIType\n"; - return; - } - - OS << "\n"; -} - -/// print - Print basic type. -void DIBasicType::print(raw_ostream &OS) const { - OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; -} - -/// print - Print derived type. -void DIDerivedType::print(raw_ostream &OS) const { - OS << "\n\t Derived From: "; - getTypeDerivedFrom().print(OS); - OS << "\n\t"; -} - -/// print - Print composite type. -void DICompositeType::print(raw_ostream &OS) const { - DIArray A = getTypeArray(); - OS << " [" << A.getNumElements() << " elements]"; -} - -/// print - Print subprogram. -void DISubprogram::print(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" << getLineNumber() << "] "; - - if (isLocalToUnit()) - OS << " [local] "; - - if (isDefinition()) - OS << " [def] "; - - if (getScopeLineNumber() != getLineNumber()) - OS << " [Scope: " << getScopeLineNumber() << "] "; - - OS << "\n"; -} - -/// print - Print global variable. -void DIGlobalVariable::print(raw_ostream &OS) const { - OS << " ["; - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" << getLineNumber() << "] "; - - if (isLocalToUnit()) - OS << " [local] "; - - if (isDefinition()) - OS << " [def] "; - - if (isGlobalVariable()) - DIGlobalVariable(DbgNode).print(OS); - OS << "]\n"; -} - -static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, - const LLVMContext &Ctx) { - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << "<unknown>"; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << " @[ "; - printDebugLoc(InlinedAtDL, CommentOS, Ctx); - CommentOS << " ]"; - } - } -} - -void DIVariable::printExtendedName(raw_ostream &OS) const { - const LLVMContext &Ctx = DbgNode->getContext(); - StringRef Res = getName(); - if (!Res.empty()) - OS << Res << "," << getLineNumber(); - if (MDNode *InlinedAt = getInlinedAt()) { - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown()) { - OS << " @["; - printDebugLoc(InlinedAtDL, OS, Ctx); - OS << "]"; - } - } -} - -/// print - Print variable. -void DIVariable::print(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - OS << " [" << getLineNumber() << "] "; - getType().print(OS); - OS << "\n"; - - // FIXME: Dump complex addresses -} - -/// dump - Print descriptor to dbgs() with a newline. -void DIDescriptor::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print compile unit to dbgs() with a newline. -void DICompileUnit::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print type to dbgs() with a newline. -void DIType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print basic type to dbgs() with a newline. -void DIBasicType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print derived type to dbgs() with a newline. -void DIDerivedType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print composite type to dbgs() with a newline. -void DICompositeType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print subprogram to dbgs() with a newline. -void DISubprogram::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print global variable. -void DIGlobalVariable::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print variable. -void DIVariable::dump() const { - print(dbgs()); dbgs() << '\n'; -} - /// fixupObjcLikeName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { @@ -990,6 +739,45 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { return DIVariable(MDNode::get(VMContext, Elts)); } +/// getDISubprogram - Find subprogram that is enclosing this scope. +DISubprogram llvm::getDISubprogram(const MDNode *Scope) { + DIDescriptor D(Scope); + if (D.isSubprogram()) + return DISubprogram(Scope); + + if (D.isLexicalBlockFile()) + return getDISubprogram(DILexicalBlockFile(Scope).getContext()); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(Scope).getContext()); + + return DISubprogram(); +} + +/// getDICompositeType - Find underlying composite type. +DICompositeType llvm::getDICompositeType(DIType T) { + if (T.isCompositeType()) + return DICompositeType(T); + + if (T.isDerivedType()) + return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); + + return DICompositeType(); +} + +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool llvm::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext()); + return false; +} + //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. //===----------------------------------------------------------------------===// @@ -1192,42 +980,179 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) { return true; } -/// getDISubprogram - Find subprogram that is enclosing this scope. -DISubprogram llvm::getDISubprogram(const MDNode *Scope) { - DIDescriptor D(Scope); - if (D.isSubprogram()) - return DISubprogram(Scope); +//===----------------------------------------------------------------------===// +// DIDescriptor: dump routines for all descriptors. +//===----------------------------------------------------------------------===// - if (D.isLexicalBlockFile()) - return getDISubprogram(DILexicalBlockFile(Scope).getContext()); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(Scope).getContext()); +/// dump - Print descriptor to dbgs() with a newline. +void DIDescriptor::dump() const { + print(dbgs()); dbgs() << '\n'; +} - return DISubprogram(); +/// print - Print descriptor. +void DIDescriptor::print(raw_ostream &OS) const { + if (!DbgNode) return; + + if (const char *Tag = dwarf::TagString(getTag())) + OS << "[ " << Tag << " ]"; + + if (this->isSubrange()) { + DISubrange(DbgNode).printInternal(OS); + } else if (this->isScope()) { + DIScope(DbgNode).printInternal(OS); + } else if (this->isCompileUnit()) { + DICompileUnit(DbgNode).printInternal(OS); + } else if (this->isFile()) { + DIFile(DbgNode).printInternal(OS); + } else if (this->isEnumerator()) { + DIEnumerator(DbgNode).printInternal(OS); + } else if (this->isBasicType()) { + DIType(DbgNode).printInternal(OS); + } else if (this->isDerivedType()) { + DIDerivedType(DbgNode).printInternal(OS); + } else if (this->isCompositeType()) { + DICompositeType(DbgNode).printInternal(OS); + } else if (this->isSubprogram()) { + DISubprogram(DbgNode).printInternal(OS); + } else if (this->isGlobalVariable()) { + DIGlobalVariable(DbgNode).printInternal(OS); + } else if (this->isVariable()) { + DIVariable(DbgNode).printInternal(OS); + } } -/// getDICompositeType - Find underlying composite type. -DICompositeType llvm::getDICompositeType(DIType T) { - if (T.isCompositeType()) - return DICompositeType(T); +void DISubrange::printInternal(raw_ostream &OS) const { + OS << " [" << getLo() << ", " << getHi() << ']'; +} - if (T.isDerivedType()) - return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); +void DIScope::printInternal(raw_ostream &OS) const { + OS << " [" << getDirectory() << "/" << getFilename() << ']'; +} - return DICompositeType(); +void DICompileUnit::printInternal(raw_ostream &OS) const { + DIScope::printInternal(OS); + if (unsigned Lang = getLanguage()) + OS << " [" << dwarf::LanguageString(Lang) << ']'; } -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -bool llvm::isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - DIDescriptor D(Context); - if (D.isSubprogram()) - return true; - if (D.isType()) - return isSubprogramContext(DIType(Context).getContext()); - return false; +void DIEnumerator::printInternal(raw_ostream &OS) const { + OS << " [" << getName() << " :: " << getEnumValue() << ']'; } +void DIType::printInternal(raw_ostream &OS) const { + if (!DbgNode) return; + + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "]"; + + // TODO: Print context? + + OS << " [line " << getLineNumber() + << ", size " << getSizeInBits() + << ", align " << getAlignInBits() + << ", offset " << getOffsetInBits(); + if (isBasicType()) + if (const char *Enc = + dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding())) + OS << ", enc " << Enc; + OS << "]"; + + if (isPrivate()) + OS << " [private]"; + else if (isProtected()) + OS << " [protected]"; + + if (isForwardDecl()) + OS << " [fwd]"; +} + +void DIDerivedType::printInternal(raw_ostream &OS) const { + DIType::printInternal(OS); + OS << " [from " << getTypeDerivedFrom().getName() << ']'; +} + +void DICompositeType::printInternal(raw_ostream &OS) const { + DIType::printInternal(OS); + DIArray A = getTypeArray(); + OS << " [" << A.getNumElements() << " elements]"; +} + +void DISubprogram::printInternal(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; + + // TODO : Print context + + OS << " [line " << getLineNumber() << ']'; + + if (isLocalToUnit()) + OS << " [local]"; + + if (isDefinition()) + OS << " [def]"; + + if (getScopeLineNumber() != getLineNumber()) + OS << " [scope " << getScopeLineNumber() << "]"; +} + +void DIGlobalVariable::printInternal(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; + + OS << " [line " << getLineNumber() << ']'; + + // TODO : Print context + + if (isLocalToUnit()) + OS << " [local]"; + + if (isDefinition()) + OS << " [def]"; +} + +void DIVariable::printInternal(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; + + OS << " [line " << getLineNumber() << ']'; +} + +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; + } + } +} + +void DIVariable::printExtendedName(raw_ostream &OS) const { + const LLVMContext &Ctx = DbgNode->getContext(); + StringRef Res = getName(); + if (!Res.empty()) + OS << Res << "," << getLineNumber(); + if (MDNode *InlinedAt = getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 726ea5bbb0..ad7a872b1f 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -192,12 +192,12 @@ bool GlobalValue::isDeclaration() const { GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, - bool ThreadLocal, unsigned AddressSpace) - : GlobalValue(PointerType::get(Ty, AddressSpace), + ThreadLocalMode TLMode, unsigned AddressSpace) + : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits<GlobalVariable>::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) { + isConstantGlobal(constant), threadLocalMode(TLMode) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -210,13 +210,13 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, - GlobalVariable *Before, bool ThreadLocal, + GlobalVariable *Before, ThreadLocalMode TLMode, unsigned AddressSpace) - : GlobalValue(PointerType::get(Ty, AddressSpace), + : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits<GlobalVariable>::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) { + isConstantGlobal(constant), threadLocalMode(TLMode) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index b45923489a..2edf698a0e 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/IRBuilder.h" -#include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" using namespace llvm; @@ -28,7 +28,7 @@ Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, - StrConstant, "", 0, false); + StrConstant); GV->setName(Name); GV->setUnnamedAddr(true); return GV; diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index faa99db4fc..66379a0493 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -240,27 +240,38 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { // isSameOperationAs // This should be kept in sync with isEquivalentOperation in // lib/Transforms/IPO/MergeFunctions.cpp. -bool Instruction::isSameOperationAs(const Instruction *I) const { +bool Instruction::isSameOperationAs(const Instruction *I, + unsigned flags) const { + bool IgnoreAlignment = flags & CompareIgnoringAlignment; + bool UseScalarTypes = flags & CompareUsingScalarTypes; + if (getOpcode() != I->getOpcode() || getNumOperands() != I->getNumOperands() || - getType() != I->getType()) + (UseScalarTypes ? + getType()->getScalarType() != I->getType()->getScalarType() : + getType() != I->getType())) return false; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (getOperand(i)->getType() != I->getOperand(i)->getType()) + if (UseScalarTypes ? + getOperand(i)->getType()->getScalarType() != + I->getOperand(i)->getType()->getScalarType() : + getOperand(i)->getType() != I->getOperand(i)->getType()) return false; // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast<LoadInst>(this)) return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() && - LI->getAlignment() == cast<LoadInst>(I)->getAlignment() && + (LI->getAlignment() == cast<LoadInst>(I)->getAlignment() || + IgnoreAlignment) && LI->getOrdering() == cast<LoadInst>(I)->getOrdering() && LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope(); if (const StoreInst *SI = dyn_cast<StoreInst>(this)) return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() && - SI->getAlignment() == cast<StoreInst>(I)->getAlignment() && + (SI->getAlignment() == cast<StoreInst>(I)->getAlignment() || + IgnoreAlignment) && SI->getOrdering() == cast<StoreInst>(I)->getOrdering() && SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope(); if (const CmpInst *CI = dyn_cast<CmpInst>(this)) diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 26cc6322da..383994554d 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -3158,6 +3158,7 @@ SwitchInst::SwitchInst(const SwitchInst &SI) OL[i] = InOL[i]; OL[i+1] = InOL[i+1]; } + TheSubsets = SI.TheSubsets; SubclassOptionalData = SI.SubclassOptionalData; } @@ -3186,14 +3187,17 @@ void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) { // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; - CaseIt Case(this, NewCaseIdx); - Case.setValueEx(OnVal); + + SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal); + + CaseIt Case(this, NewCaseIdx, TheSubsetsIt); + Case.updateCaseValueOperand(OnVal); Case.setSuccessor(Dest); } /// removeCase - This method removes the specified case and its successor /// from the switch instruction. -void SwitchInst::removeCase(CaseIt i) { +void SwitchInst::removeCase(CaseIt& i) { unsigned idx = i.getCaseIndex(); assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!"); @@ -3210,6 +3214,16 @@ void SwitchInst::removeCase(CaseIt i) { // Nuke the last value. OL[NumOps-2].set(0); OL[NumOps-2+1].set(0); + + // Do the same with TheCases collection: + if (i.SubsetIt != --TheSubsets.end()) { + *i.SubsetIt = TheSubsets.back(); + TheSubsets.pop_back(); + } else { + TheSubsets.pop_back(); + i.SubsetIt = TheSubsets.end(); + } + NumOperands = NumOps-2; } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index f018f44d0b..ede4626b92 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "SymbolTableListTraitsImpl.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ValueHandle.h" using namespace llvm; @@ -401,6 +402,155 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { } } +MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + if (A == B) + return A; + + SmallVector<MDNode *, 4> PathA; + MDNode *T = A; + while (T) { + PathA.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + SmallVector<MDNode *, 4> PathB; + T = B; + while (T) { + PathB.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; + } + + int IA = PathA.size() - 1; + int IB = PathB.size() - 1; + + MDNode *Ret = 0; + while (IA >= 0 && IB >=0) { + if (PathA[IA] == PathB[IB]) + Ret = PathA[IA]; + else + break; + --IA; + --IB; + } + return Ret; +} + +MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF(); + APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF(); + if (AVal.compare(BVal) == APFloat::cmpLessThan) + return A; + return B; +} + +static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { + return A.getUpper() == B.getLower() || A.getLower() == B.getUpper(); +} + +static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) { + return !A.intersectWith(B).isEmptySet() || isContiguous(A, B); +} + +static bool tryMergeRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low, + ConstantInt *High) { + ConstantRange NewRange(Low->getValue(), High->getValue()); + unsigned Size = EndPoints.size(); + APInt LB = cast<ConstantInt>(EndPoints[Size - 2])->getValue(); + APInt LE = cast<ConstantInt>(EndPoints[Size - 1])->getValue(); + ConstantRange LastRange(LB, LE); + if (canBeMerged(NewRange, LastRange)) { + ConstantRange Union = LastRange.unionWith(NewRange); + Type *Ty = High->getType(); + EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower()); + EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper()); + return true; + } + return false; +} + +static void addRange(SmallVector<Value*, 4> &EndPoints, ConstantInt *Low, + ConstantInt *High) { + if (!EndPoints.empty()) + if (tryMergeRange(EndPoints, Low, High)) + return; + + EndPoints.push_back(Low); + EndPoints.push_back(High); +} + +MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { + // Given two ranges, we want to compute the union of the ranges. This + // is slightly complitade by having to combine the intervals and merge + // the ones that overlap. + + if (!A || !B) + return NULL; + + if (A == B) + return A; + + // First, walk both lists in older of the lower boundary of each interval. + // At each step, try to merge the new interval to the last one we adedd. + SmallVector<Value*, 4> EndPoints; + int AI = 0; + int BI = 0; + int AN = A->getNumOperands() / 2; + int BN = B->getNumOperands() / 2; + while (AI < AN && BI < BN) { + ConstantInt *ALow = cast<ConstantInt>(A->getOperand(2 * AI)); + ConstantInt *BLow = cast<ConstantInt>(B->getOperand(2 * BI)); + + if (ALow->getValue().slt(BLow->getValue())) { + addRange(EndPoints, ALow, cast<ConstantInt>(A->getOperand(2 * AI + 1))); + ++AI; + } else { + addRange(EndPoints, BLow, cast<ConstantInt>(B->getOperand(2 * BI + 1))); + ++BI; + } + } + while (AI < AN) { + addRange(EndPoints, cast<ConstantInt>(A->getOperand(2 * AI)), + cast<ConstantInt>(A->getOperand(2 * AI + 1))); + ++AI; + } + while (BI < BN) { + addRange(EndPoints, cast<ConstantInt>(B->getOperand(2 * BI)), + cast<ConstantInt>(B->getOperand(2 * BI + 1))); + ++BI; + } + + // If we have more than 2 ranges (4 endpoints) we have to try to merge + // the last and first ones. + unsigned Size = EndPoints.size(); + if (Size > 4) { + ConstantInt *FB = cast<ConstantInt>(EndPoints[0]); + ConstantInt *FE = cast<ConstantInt>(EndPoints[1]); + if (tryMergeRange(EndPoints, FB, FE)) { + for (unsigned i = 0; i < Size - 2; ++i) { + EndPoints[i] = EndPoints[i + 2]; + } + EndPoints.resize(Size - 2); + } + } + + // If in the end we have a single range, it is possible that it is now the + // full range. Just drop the metadata in that case. + if (EndPoints.size() == 2) { + ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(), + cast<ConstantInt>(EndPoints[1])->getValue()); + if (Range.isFullSet()) + return NULL; + } + + return MDNode::get(A->getContext(), EndPoints); +} + //===----------------------------------------------------------------------===// // NamedMDNode implementation. // diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 477b81dc67..5d51f4164a 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1636,8 +1636,11 @@ void Verifier::visitInstruction(Instruction &I) { if (Function *F = dyn_cast<Function>(I.getOperand(i))) { // Check to make sure that the "address of" an intrinsic function is never // taken. - Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)), + Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 : 0), "Cannot take the address of an intrinsic!", &I); + Assert1(!F->isIntrinsic() || isa<CallInst>(I) || + F->getIntrinsicID() == Intrinsic::donothing, + "Cannot invoke an intrinsinc other than donothing", &I); Assert1(F->getParent() == Mod, "Referencing function in another module!", &I); } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) { diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index 415530e332..dac637335b 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -4,7 +4,8 @@ file(GLOB entries *) foreach(entry ${entries}) if(IS_DIRECTORY ${entry} AND EXISTS ${entry}/CMakeLists.txt) - if(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt) + if((NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt) AND + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxx)) add_subdirectory(${entry}) endif() endif() diff --git a/projects/sample/Makefile.llvm.config.in b/projects/sample/Makefile.llvm.config.in index 697660c0ca..9a85b3df63 100644 --- a/projects/sample/Makefile.llvm.config.in +++ b/projects/sample/Makefile.llvm.config.in @@ -157,8 +157,6 @@ GAS := @GAS@ POD2HTML := @POD2HTML@ POD2MAN := @POD2MAN@ PDFROFF := @PDFROFF@ -RUNTEST := @RUNTEST@ -TCLSH := @TCLSH@ ZIP := @ZIP@ HAVE_PTHREAD := @HAVE_PTHREAD@ diff --git a/projects/sample/Makefile.llvm.rules b/projects/sample/Makefile.llvm.rules index c980d6b667..a6553020f8 100644 --- a/projects/sample/Makefile.llvm.rules +++ b/projects/sample/Makefile.llvm.rules @@ -1866,20 +1866,9 @@ check:: $(EchoCmd) No test directory ; \ fi +# An alias dating from when both lit and DejaGNU test runners were used. check-lit:: check -check-dg:: - $(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \ - if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \ - $(EchoCmd) Running test suite ; \ - $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-dg ; \ - else \ - $(EchoCmd) No Makefile in test directory ; \ - fi ; \ - else \ - $(EchoCmd) No test directory ; \ - fi - check-all:: $(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \ if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \ diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac index 092bc6843f..bd0b16a4a6 100644 --- a/projects/sample/autoconf/configure.ac +++ b/projects/sample/autoconf/configure.ac @@ -305,6 +305,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; mips-*) llvm_cv_target_arch="Mips" ;; + mipsel-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; diff --git a/projects/sample/configure b/projects/sample/configure index cb1bb0b2bb..0e341fbb34 100755 --- a/projects/sample/configure +++ b/projects/sample/configure @@ -3846,6 +3846,7 @@ else powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; mips-*) llvm_cv_target_arch="Mips" ;; + mipsel-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; @@ -10312,7 +10313,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10310 "configure" +#line 10311 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c index b01b948142..f2dc4f7988 100644 --- a/runtime/libprofile/GCDAProfiling.c +++ b/runtime/libprofile/GCDAProfiling.c @@ -117,6 +117,7 @@ void llvm_gcda_start_file(const char *orig_filename) { fprintf(stderr, "LLVM profiling runtime: cannot open '%s': ", cptr ? cptr + 1 : orig_filename); perror(""); + free(filename); return; } } diff --git a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll index 4b787bf9d0..768411e9d3 100644 --- a/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll +++ b/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll @@ -1,7 +1,7 @@ ; This testcase consists of alias relations which should be completely ; resolvable by basicaa. -; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s %T = type { i32, [10 x i8] } diff --git a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll index 68039fbc0a..b7bbf7732f 100644 --- a/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll +++ b/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll @@ -1,7 +1,7 @@ ; This testcase consists of alias relations which should be completely ; resolvable by basicaa, but require analysis of getelementptr constant exprs. -; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s %T = type { i32, [10 x i8] } diff --git a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll index c98241e846..06a804c392 100644 --- a/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll +++ b/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -disable-output 2>&1 | FileCheck %s ; TEST that A[1][0] may alias A[0][i]. target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll index b46ee1925e..2a6f5b9955 100644 --- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll +++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; CHECK: Function: foo ; CHECK: MayAlias: i32* %x, i32* %y diff --git a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll index 93b277edf6..4be793ec41 100644 --- a/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll +++ b/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; CHECK: Function: foo ; CHECK: MayAlias: i32* %Ipointer, i32* %Jpointer diff --git a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll index e8cf380d6f..3db9a3fbcd 100644 --- a/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll +++ b/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -disable-output 2>&1 | FileCheck %s declare noalias i32* @_Znwj(i32 %x) nounwind diff --git a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll index a9750d25c6..c546d68f42 100644 --- a/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll +++ b/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; If GEP base doesn't alias Z, then GEP doesn't alias Z. ; rdar://7282591 diff --git a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll index e647231de1..66569808fb 100644 --- a/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll +++ b/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; PR7959 target datalayout = "e-p:32:32:32" diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll index c7b43ec684..066f46b16c 100644 --- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck %s +; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s declare void @callee(double* %callee_arg) declare void @nocap_callee(double* nocapture %nocap_callee_arg) diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll index 48ef2595f2..232533cc73 100644 --- a/test/Analysis/BasicAA/constant-over-index.ll +++ b/test/Analysis/BasicAA/constant-over-index.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s ; PR4267 ; CHECK: MayAlias: double* %p.0.i.0, double* %p3 diff --git a/test/Analysis/BasicAA/dag.ll b/test/Analysis/BasicAA/dag.ll index 501f4c399b..1d2f6f1a76 100644 --- a/test/Analysis/BasicAA/dag.ll +++ b/test/Analysis/BasicAA/dag.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/Analysis/BasicAA/empty.ll b/test/Analysis/BasicAA/empty.ll index 25927ebbe1..dfc79f9c04 100644 --- a/test/Analysis/BasicAA/empty.ll +++ b/test/Analysis/BasicAA/empty.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll index 4bb2832661..9c2c7eeec3 100644 --- a/test/Analysis/BasicAA/gep-alias.ll +++ b/test/Analysis/BasicAA/gep-alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -instcombine -S |& FileCheck %s +; RUN: opt < %s -basicaa -gvn -instcombine -S 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll index ebd349a6aa..f0f1a631d0 100644 --- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; CHECK: Just Ref: call void @ro() <-> call void @f0() diff --git a/test/Analysis/BasicAA/must-and-partial.ll b/test/Analysis/BasicAA/must-and-partial.ll index 93b6184f4f..58139ff30e 100644 --- a/test/Analysis/BasicAA/must-and-partial.ll +++ b/test/Analysis/BasicAA/must-and-partial.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s ; When merging MustAlias and PartialAlias, merge to PartialAlias ; instead of MayAlias. diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll index a1b8d2ce53..6aa26c185e 100644 --- a/test/Analysis/BasicAA/phi-aa.ll +++ b/test/Analysis/BasicAA/phi-aa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; rdar://7282591 @X = common global i32 0 diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll index f752ab3c3e..b8fee00ed0 100644 --- a/test/Analysis/BasicAA/phi-and-select.ll +++ b/test/Analysis/BasicAA/phi-and-select.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; BasicAA should detect NoAliases in PHIs and Selects. diff --git a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll index 55d0be6f26..595cc427c4 100644 --- a/test/Analysis/CallGraph/2008-09-09-DirectCall.ll +++ b/test/Analysis/CallGraph/2008-09-09-DirectCall.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-callgraph -disable-output |& FileCheck %s +; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s ; CHECK: Call graph node <<null function>> ; CHECK: CS<{{.*}}> calls function 'callee' diff --git a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll index 632cd0c790..ac95188899 100644 --- a/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll +++ b/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-callgraph -disable-output |& FileCheck %s +; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s @a = global void ()* @f ; <void ()**> [#uses=0] diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll index 272a5593de..450dce58e3 100644 --- a/test/Analysis/CallGraph/no-intrinsics.ll +++ b/test/Analysis/CallGraph/no-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-callgraph -disable-output |& FileCheck %s +; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s ; Check that intrinsics aren't added to the call graph diff --git a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll index 9355aeea54..7119007ffd 100644 --- a/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll +++ b/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll @@ -2,7 +2,7 @@ ; not a child of the loopentry.6 loop. ; ; RUN: opt < %s -analyze -loops | \ -; RUN: grep {^ Loop at depth 4 containing: %loopentry.7<header><latch><exiting>} +; RUN: grep "^ Loop at depth 4 containing: %loopentry.7<header><latch><exiting>" define void @getAndMoveToFrontDecode() { br label %endif.2 diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll index faec45a911..ac77ab36e6 100644 --- a/test/Analysis/RegionInfo/block_sort.ll +++ b/test/Analysis/RegionInfo/block_sort.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats -analyze < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @BZ2_blockSort() nounwind { start: diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll index 2ce57c3c5f..1145ffdba0 100644 --- a/test/Analysis/RegionInfo/cond_loop.ll +++ b/test/Analysis/RegionInfo/cond_loop.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 5: diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll index 7ca5c7c7b5..6b398800db 100644 --- a/test/Analysis/RegionInfo/condition_complicated.ll +++ b/test/Analysis/RegionInfo/condition_complicated.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { end165: diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll index 5fa940a61e..f551108d60 100644 --- a/test/Analysis/RegionInfo/condition_complicated_2.ll +++ b/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc void @compress() nounwind { end33: diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll index 098c9b6b46..5e4d9d2f8b 100644 --- a/test/Analysis/RegionInfo/condition_forward_edge.ll +++ b/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll index 1b88596c0f..e48413a4c2 100644 --- a/test/Analysis/RegionInfo/condition_same_exit.ll +++ b/test/Analysis/RegionInfo/condition_same_exit.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll index 19b154b647..00d9ed24e1 100644 --- a/test/Analysis/RegionInfo/condition_simple.ll +++ b/test/Analysis/RegionInfo/condition_simple.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll index 3b152d2f56..b84abecc16 100644 --- a/test/Analysis/RegionInfo/exit_in_condition.ll +++ b/test/Analysis/RegionInfo/exit_in_condition.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll index 59cead4926..8e588286a5 100644 --- a/test/Analysis/RegionInfo/infinite_loop.ll +++ b/test/Analysis/RegionInfo/infinite_loop.ll @@ -1,5 +1,5 @@ ; RUN: opt -regions -analyze < %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll index 80c69b7ab2..a8227e340c 100644 --- a/test/Analysis/RegionInfo/infinite_loop_2.ll +++ b/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll index 74ceafb849..b09c9c1e59 100644 --- a/test/Analysis/RegionInfo/infinite_loop_3.ll +++ b/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -1,8 +1,8 @@ ; RUN: opt -regions -analyze < %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll index fd56af1d3b..681c305ce9 100644 --- a/test/Analysis/RegionInfo/infinite_loop_4.ll +++ b/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll index d1d68982ee..08d2ba8e35 100644 --- a/test/Analysis/RegionInfo/loop_with_condition.ll +++ b/test/Analysis/RegionInfo/loop_with_condition.ll @@ -1,8 +1,8 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition() nounwind { 0: diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll index d4bf3cc501..6449949df8 100644 --- a/test/Analysis/RegionInfo/loops_1.ll +++ b/test/Analysis/RegionInfo/loops_1.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @loops_1() nounwind { entry: diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll index 07aa7c3110..dc4a1adffb 100644 --- a/test/Analysis/RegionInfo/loops_2.ll +++ b/test/Analysis/RegionInfo/loops_2.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @meread_() nounwind { entry: diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll index 829c157c2c..1474e033e5 100644 --- a/test/Analysis/RegionInfo/mix_1.ll +++ b/test/Analysis/RegionInfo/mix_1.ll @@ -1,8 +1,8 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @a_linear_impl_fig_1() nounwind { 0: diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll index 7bc0e4607d..8de6472299 100644 --- a/test/Analysis/RegionInfo/multiple_exiting_edge.ll +++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll @@ -1,5 +1,5 @@ -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @normal_condition_0() nounwind { bb38: ; preds = %bb34, %bb34, %bb37 diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll index 9d8c4558f0..a3707a1987 100644 --- a/test/Analysis/RegionInfo/nested_loops.ll +++ b/test/Analysis/RegionInfo/nested_loops.ll @@ -1,8 +1,8 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll index 377a84d389..890b4f2300 100644 --- a/test/Analysis/RegionInfo/next.ll +++ b/test/Analysis/RegionInfo/next.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @MAIN__() nounwind { entry: diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll index 00b544bc69..96c87e0559 100644 --- a/test/Analysis/RegionInfo/paper.ll +++ b/test/Analysis/RegionInfo/paper.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define void @a_linear_impl_fig_1() nounwind { 0: diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll index a97182b81a..e75661e890 100644 --- a/test/Analysis/RegionInfo/two_loops_same_header.ll +++ b/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -1,7 +1,7 @@ ; RUN: opt -regions -analyze < %s | FileCheck %s -; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s -; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s -; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s +; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s +; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s +; RUN: opt -regions -print-region-style=rn -analyze < %s 2>&1 | FileCheck -check-prefix=RNIT %s define internal fastcc zeroext i8 @handle_compress() nounwind { entry: diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll index 7ff130f201..e0c5583cbb 100644 --- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll +++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | grep {Loop %bb: backedge-taken count is 100} +; RUN: -scalar-evolution-max-iterations=0 | grep "Loop %bb: backedge-taken count is 100" ; PR1533 @array = weak global [101 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll index ab96243ef1..036abf5b7c 100644 --- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll +++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)} +; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb: backedge-taken count is (-1 + (-1 \* %x) + %y)" ; PR1597 define i32 @f(i32 %x, i32 %y) { diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll index b678fee22c..a3192b9c01 100644 --- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll +++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13} +; RUN: -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 13" ; PR1706 define i32 @f() { diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll index fe3a7f4191..d0644f7b3f 100644 --- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll +++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %header: backedge-taken count is (0 smax %n)} +; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %header: backedge-taken count is (0 smax %n)" define void @foo(i32 %n) { entry: diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll index bcc124d1ec..41734d70f0 100644 --- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll +++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61} +; RUN: -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 61" ; PR2364 define i32 @func_6() nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll index 9db9b71c7c..5cf17a2101 100644 --- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll +++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution |& not grep smax +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax ; PR2261 @lut = common global [256 x i8] zeroinitializer, align 32 ; <[256 x i8]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll index 1847665552..195dfaaaee 100644 --- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll +++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution |& not grep smax +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | not grep smax ; PR2070 define i32 @a(i32 %x) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll index 86e07ec41b..cbf200e40f 100644 --- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll +++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113} +; RUN: -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 113" ; PR2088 define void @fun() { diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll index 335bbaf9ad..c25e4a3b2b 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution |& \ -; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))} +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \ +; RUN: grep "Loop %bb: backedge-taken count is (7 + (-1 \* %argc))" define i32 @main(i32 %argc, i8** %argv) nounwind { entry: diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll index db527fefa9..56a8343883 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {Loop %bb: Unpredictable backedge-taken count\\.} +; RUN: | grep "Loop %bb: Unpredictable backedge-taken count\." ; ScalarEvolution can't compute a trip count because it doesn't know if ; dividing by the stride will have a remainder. This could theoretically diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll index fa9f21af37..aaf6770676 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 3} +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 3" ; XFAIL: * ; This is a tricky testcase for unsigned wrap detection which ScalarEvolution diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll index 25a0434b29..a1b3b71916 100644 --- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll +++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution | grep {backedge-taken count is 255} +; RUN: opt < %s -analyze -scalar-evolution | grep "backedge-taken count is 255" define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind { bb1.thread: diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll index 8152e988ff..bb149193a0 100644 --- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll +++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -analyze -scalar-evolution |& \ -; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)} +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | \ +; RUN: grep "(((-1 * %i0) + (100005 smax %i0)) /u 5)" ; XFAIL: * define i32 @foo0(i32 %i0) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll index 3eaa49212e..70006260cb 100644 --- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll +++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 5} +; RUN: opt < %s -analyze -scalar-evolution 2>&1 | grep "/u 5" ; XFAIL: * define i8 @foo0(i8 %i0) nounwind { diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll index cc2a2e42bc..82f2608e57 100644 --- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll +++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution | not grep {/u -1} +; RUN: opt < %s -analyze -scalar-evolution | not grep "/u -1" ; PR3275 @g_16 = external global i16 ; <i16*> [#uses=3] diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll index c2e108aa9c..ebd9f7377d 100644 --- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll +++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution | grep {(trunc i} | not grep ext +; RUN: opt < %s -analyze -scalar-evolution | grep "(trunc i" | not grep ext define i16 @test1(i8 %x) { %A = sext i8 %x to i32 diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll index dc7bd29c57..8a78043134 100644 --- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll +++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -scalar-evolution | grep {count is 2} +; RUN: opt < %s -analyze -scalar-evolution | grep "count is 2" ; PR3171 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll index 17725735a7..06f4a8582f 100644 --- a/test/Analysis/ScalarEvolution/and-xor.ll +++ b/test/Analysis/ScalarEvolution/and-xor.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep {\\--> (zext} | count 2 +; RUN: | grep "\--> (zext" | count 2 define i32 @foo(i32 %x) { %n = and i32 %x, 255 diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll index 24275f9fc4..3d15c787fc 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb3: backedge-taken count is (-1 + %n)} +; RUN: opt < %s -scalar-evolution -analyze | grep "Loop %bb3: backedge-taken count is (-1 + %n)" ; We don't want to use a max in the trip count expression in ; this testcase. diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll index 4f6f1e2a30..2846797560 100644 --- a/test/Analysis/ScalarEvolution/div-overflow.ll +++ b/test/Analysis/ScalarEvolution/div-overflow.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep {\\--> ((-128 \\* %a) /u -128)} +; RUN: | grep "\--> ((-128 \* %a) /u -128)" ; Don't let ScalarEvolution fold this div away. diff --git a/test/Analysis/ScalarEvolution/how-far-to-zero.ll b/test/Analysis/ScalarEvolution/how-far-to-zero.ll new file mode 100644 index 0000000000..07af88ffbe --- /dev/null +++ b/test/Analysis/ScalarEvolution/how-far-to-zero.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +; PR13228 +define void @f() nounwind uwtable readnone { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %c.0 = phi i8 [ 1, %entry ], [ 0, %for.cond ] + %i.0 = phi i8 [ 0, %entry ], [ %inc, %for.cond ] + %lnot = icmp eq i8 %i.0, 0 + %inc = add i8 %i.0, 1 + br i1 %lnot, label %for.cond, label %while.cond + +while.cond: ; preds = %while.body, %for.cond + %b.2 = phi i8 [ %add, %while.body ], [ 0, %for.cond ] + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %add = add i8 %b.2, %c.0 + %tobool7 = icmp eq i8 %add, 0 + br i1 %tobool7, label %while.end, label %while.cond + +while.end: ; preds = %while.body, %while.cond + ret void +} +;CHECK: Loop %while.cond: <multiple exits> Unpredictable backedge-taken count. diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll index dd5a66ccb4..a0abbb787b 100644 --- a/test/Analysis/ScalarEvolution/scev-aa.ll +++ b/test/Analysis/ScalarEvolution/scev-aa.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \ -; RUN: |& FileCheck %s +; RUN: 2>&1 | FileCheck %s ; At the time of this writing, -basicaa misses the example of the form ; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references, diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll index 23e1210dba..8b3d641943 100644 --- a/test/Analysis/ScalarEvolution/sext-inreg.ll +++ b/test/Analysis/ScalarEvolution/sext-inreg.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -analyze -scalar-evolution > %t -; RUN: grep {sext i57 \{0,+,199\}<%bb> to i64} %t | count 1 -; RUN: grep {sext i59 \{0,+,199\}<%bb> to i64} %t | count 1 +; RUN: grep "sext i57 {0,+,199}<%bb> to i64" %t | count 1 +; RUN: grep "sext i59 {0,+,199}<%bb> to i64" %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll index 9063cbb22a..c34596d35a 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-1.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep { --> (sext i. \{.\*,+,.\*\}<%bb1> to i64)} | count 5 +; RUN: | grep " --> (sext i. {.*,+,.*}<%bb1> to i64)" | count 5 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases ; where the trip count is not within range. diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll index 15dd744c8f..eceb4298fd 100644 --- a/test/Analysis/ScalarEvolution/smax.ll +++ b/test/Analysis/ScalarEvolution/smax.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -analyze -scalar-evolution | grep smax | count 2 ; RUN: opt < %s -analyze -scalar-evolution | grep \ -; RUN: {%. smax %. smax %.} +; RUN: "%. smax %. smax %." ; PR1614 define i32 @x(i32 %a, i32 %b, i32 %c) { diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll index cb4e267dd2..94f6882c0c 100644 --- a/test/Analysis/ScalarEvolution/trip-count.ll +++ b/test/Analysis/ScalarEvolution/trip-count.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000} +; RUN: -scalar-evolution-max-iterations=0 | grep "backedge-taken count is 10000" ; PR1101 @A = weak global [1000 x i32] zeroinitializer, align 32 diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll index e26cbea732..d84e99f6e7 100644 --- a/test/Analysis/ScalarEvolution/trip-count2.ll +++ b/test/Analysis/ScalarEvolution/trip-count2.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution | \ -; RUN: grep {backedge-taken count is 4} +; RUN: grep "backedge-taken count is 4" ; PR1101 @A = weak global [1000 x i32] zeroinitializer, align 32 diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index 1bf86ae9be..0cb6c952b8 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep {Loop %bb3\\.i: Unpredictable backedge-taken count\\.} +; RUN: | grep "Loop %bb3\.i: Unpredictable backedge-taken count\." ; ScalarEvolution can't compute a trip count because it doesn't know if ; dividing by the stride will have a remainder. This could theoretically diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll index 116f62dbdb..c02ae14526 100644 --- a/test/Analysis/ScalarEvolution/trip-count4.ll +++ b/test/Analysis/ScalarEvolution/trip-count4.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {sext.*trunc.*Exits: 11} +; RUN: | grep "sext.*trunc.*Exits: 11" ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8. diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll index 1194a1da66..68a1ae14a7 100644 --- a/test/Analysis/ScalarEvolution/trip-count5.ll +++ b/test/Analysis/ScalarEvolution/trip-count5.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -analyze -scalar-evolution > %t ; RUN: grep sext %t | count 2 -; RUN: not grep {(sext} %t +; RUN: not grep "(sext" %t ; ScalarEvolution should be able to compute a maximum trip count ; value sufficient to fold away both sext casts. diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll index 956fb81b0e..882f5526da 100644 --- a/test/Analysis/ScalarEvolution/trip-count6.ll +++ b/test/Analysis/ScalarEvolution/trip-count6.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {max backedge-taken count is 1\$} +; RUN: | grep "max backedge-taken count is 1$" @mode_table = global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll index a8b797e142..2bcb9e92ab 100644 --- a/test/Analysis/ScalarEvolution/trip-count7.ll +++ b/test/Analysis/ScalarEvolution/trip-count7.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {Loop %bb7.i: Unpredictable backedge-taken count\\.} +; RUN: | grep "Loop %bb7.i: Unpredictable backedge-taken count\." target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll index ac5ee607ec..005162b792 100644 --- a/test/Analysis/ScalarEvolution/trip-count8.ll +++ b/test/Analysis/ScalarEvolution/trip-count8.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -analyze -scalar-evolution \ -; RUN: | grep {Loop %for\\.body: backedge-taken count is (-1 + \[%\]ecx)} +; RUN: | grep "Loop %for\.body: backedge-taken count is (-1 + [%]ecx)" ; PR4599 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll index c0530bbc3c..4ab2f39a28 100644 --- a/test/Analysis/ScalarEvolution/xor-and.ll +++ b/test/Analysis/ScalarEvolution/xor-and.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep {\\--> (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)} +; RUN: | grep "\--> (zext i4 (-8 + (trunc i64 (8 \* %x) to i4)) to i64)" ; ScalarEvolution shouldn't try to analyze %z into something like ; --> (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64) diff --git a/test/Assembler/2003-04-15-ConstantInitAssertion.ll b/test/Assembler/2003-04-15-ConstantInitAssertion.ll index fa6b807709..dddbdb1c47 100644 --- a/test/Assembler/2003-04-15-ConstantInitAssertion.ll +++ b/test/Assembler/2003-04-15-ConstantInitAssertion.ll @@ -1,4 +1,5 @@ -; RUN: not llvm-as < %s >/dev/null |& grep {struct initializer doesn't match struct element type} +; RUN: not llvm-as < %s >/dev/null 2> %t +; RUN: grep "struct initializer doesn't match struct element type" %t ; Test the case of a misformed constant initializer ; This should cause an assembler error, not an assertion failure! constant { i32 } { float 1.0 } diff --git a/test/Assembler/2003-05-21-MalformedShiftCrash.ll b/test/Assembler/2003-05-21-MalformedShiftCrash.ll index a845d89bb6..1d4ac401d6 100644 --- a/test/Assembler/2003-05-21-MalformedShiftCrash.ll +++ b/test/Assembler/2003-05-21-MalformedShiftCrash.ll @@ -1,4 +1,5 @@ ; Found by inspection of the code -; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer operands} +; RUN: not llvm-as < %s > /dev/null 2> %t +; RUN: grep "constexpr requires integer operands" %t global i32 ashr (float 1.0, float 2.0) diff --git a/test/Assembler/2003-05-21-MalformedStructCrash.ll b/test/Assembler/2003-05-21-MalformedStructCrash.ll index 8d20e0703a..44d3e234d7 100644 --- a/test/Assembler/2003-05-21-MalformedStructCrash.ll +++ b/test/Assembler/2003-05-21-MalformedStructCrash.ll @@ -1,4 +1,5 @@ ; Found by inspection of the code -; RUN: not llvm-as < %s > /dev/null |& grep {initializer with struct type has wrong # elements} +; RUN: not llvm-as < %s > /dev/null 2> %t +; RUN: grep "initializer with struct type has wrong # elements" %t global {} { i32 7, float 1.0, i32 7, i32 8 } diff --git a/test/Assembler/2003-11-12-ConstantExprCast.ll b/test/Assembler/2003-11-12-ConstantExprCast.ll index 149fef2276..47a53537f8 100644 --- a/test/Assembler/2003-11-12-ConstantExprCast.ll +++ b/test/Assembler/2003-11-12-ConstantExprCast.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | not grep { bitcast (} +; RUN: llvm-as < %s | llvm-dis | not grep " bitcast (" @.Base64_1 = external constant [4 x i8] ; <[4 x i8]*> [#uses=1] diff --git a/test/Assembler/2003-11-24-SymbolTableCrash.ll b/test/Assembler/2003-11-24-SymbolTableCrash.ll index 041b0d94c4..28fd30178d 100644 --- a/test/Assembler/2003-11-24-SymbolTableCrash.ll +++ b/test/Assembler/2003-11-24-SymbolTableCrash.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {multiple definition} +; RUN: not llvm-as < %s 2>&1 | grep "multiple definition" define void @test() { %tmp.1 = add i32 0, 1 diff --git a/test/Assembler/2004-01-11-getelementptrfolding.ll b/test/Assembler/2004-01-11-getelementptrfolding.ll index c22aede5df..5249d0e5bc 100644 --- a/test/Assembler/2004-01-11-getelementptrfolding.ll +++ b/test/Assembler/2004-01-11-getelementptrfolding.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as < %s | llvm-dis | \ -; RUN: not grep {getelementptr.*getelementptr} +; RUN: not grep "getelementptr.*getelementptr" %struct.TTriangleItem = type { i8*, i8*, [3 x %struct.TUVVertex] } %struct.TUVVertex = type { i16, i16, i16, i16 } diff --git a/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll b/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll index 775b7558f3..9f24f1afd5 100644 --- a/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll +++ b/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll @@ -1,3 +1,3 @@ -; RUN: not llvm-as %s |& grep {found end of file when expecting more instructions} +; RUN: not llvm-as %s 2>&1 | grep "found end of file when expecting more instructions" define void @foo() { diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll index 40648fdbde..4db5b7453b 100644 --- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll +++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll @@ -1,4 +1,4 @@ ; Test for PR463. This program is erroneous, but should not crash llvm-as. -; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.none'} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'struct.none'" @.FOO = internal global %struct.none zeroinitializer diff --git a/test/Assembler/2006-09-28-CrashOnInvalid.ll b/test/Assembler/2006-09-28-CrashOnInvalid.ll index a203c6ad03..6041bdf479 100644 --- a/test/Assembler/2006-09-28-CrashOnInvalid.ll +++ b/test/Assembler/2006-09-28-CrashOnInvalid.ll @@ -1,6 +1,7 @@ ; Test for PR902. This program is erroneous, but should not crash llvm-as. ; This tests that a simple error is caught and processed correctly. -; RUN: not llvm-as < %s >/dev/null |& grep {floating point constant invalid for type} +; RUN: not llvm-as < %s >/dev/null 2> %t +; RUN: grep "floating point constant invalid for type" %t define void @test() { add i32 1, 2.0 diff --git a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll index a39de1cb6c..184e543123 100644 --- a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll +++ b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll @@ -1,5 +1,5 @@ ; The assembler should catch an undefined argument type . -; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'typedef.bc_struct'} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'typedef.bc_struct'" ; %typedef.bc_struct = type opaque diff --git a/test/Assembler/2007-01-16-CrashOnBadCast.ll b/test/Assembler/2007-01-16-CrashOnBadCast.ll index 81f5458b2e..aa741443d8 100644 --- a/test/Assembler/2007-01-16-CrashOnBadCast.ll +++ b/test/Assembler/2007-01-16-CrashOnBadCast.ll @@ -1,5 +1,5 @@ ; PR1117 -; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "invalid cast opcode for cast from" define i8* @nada(i64 %X) { %result = trunc i64 %X to i8* diff --git a/test/Assembler/2007-01-16-CrashOnBadCast2.ll b/test/Assembler/2007-01-16-CrashOnBadCast2.ll index c05c60952c..479bef7d7a 100644 --- a/test/Assembler/2007-01-16-CrashOnBadCast2.ll +++ b/test/Assembler/2007-01-16-CrashOnBadCast2.ll @@ -1,4 +1,4 @@ ; PR1117 -; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "invalid cast opcode for cast from" @X = constant i8* trunc (i64 0 to i8*) diff --git a/test/Assembler/2007-03-18-InvalidNumberedVar.ll b/test/Assembler/2007-03-18-InvalidNumberedVar.ll index b2193b1701..0f6b24d5d9 100644 --- a/test/Assembler/2007-03-18-InvalidNumberedVar.ll +++ b/test/Assembler/2007-03-18-InvalidNumberedVar.ll @@ -1,5 +1,6 @@ ; PR 1258 -; RUN: not llvm-as < %s >/dev/null |& grep {'%0' defined with type 'i1'} +; RUN: not llvm-as < %s >/dev/null 2> %t +; RUN: grep "'%0' defined with type 'i1'" %t define i32 @test1(i32 %a, i32 %b) { entry: diff --git a/test/Assembler/2007-03-19-NegValue.ll b/test/Assembler/2007-03-19-NegValue.ll index e90cf351e1..64eb3cb590 100644 --- a/test/Assembler/2007-03-19-NegValue.ll +++ b/test/Assembler/2007-03-19-NegValue.ll @@ -1,5 +1,5 @@ ; Test whether negative values > 64 bits retain their negativeness. -; RUN: llvm-as < %s | llvm-dis | grep {add i65.*, -1} +; RUN: llvm-as < %s | llvm-dis | grep "add i65.*, -1" define i65 @testConsts(i65 %N) { %a = add i65 %N, -1 diff --git a/test/Assembler/2007-04-20-AlignedLoad.ll b/test/Assembler/2007-04-20-AlignedLoad.ll index f0217aec2c..98a5428a97 100644 --- a/test/Assembler/2007-04-20-AlignedLoad.ll +++ b/test/Assembler/2007-04-20-AlignedLoad.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | grep {align 1024} +; RUN: llvm-as < %s | llvm-dis | grep "align 1024" define i32 @test(i32* %arg) { entry: diff --git a/test/Assembler/2007-04-20-AlignedStore.ll b/test/Assembler/2007-04-20-AlignedStore.ll index 1b08c48444..9e4dd9fd07 100644 --- a/test/Assembler/2007-04-20-AlignedStore.ll +++ b/test/Assembler/2007-04-20-AlignedStore.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | grep {align 1024} +; RUN: llvm-as < %s | llvm-dis | grep "align 1024" define void @test(i32* %arg) { entry: diff --git a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll index c26d9ebc26..b0ca1aad86 100644 --- a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll +++ b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | grep {icmp.*test_weak.*null} +; RUN: llvm-as < %s | llvm-dis | grep "icmp.*test_weak.*null" ; PR1358 @G = global i1 icmp ne (i32 (...)* @test_weak, i32 (...)* null) diff --git a/test/Assembler/2007-08-06-AliasInvalid.ll b/test/Assembler/2007-08-06-AliasInvalid.ll index 9409598246..3abdc41cd3 100644 --- a/test/Assembler/2007-08-06-AliasInvalid.ll +++ b/test/Assembler/2007-08-06-AliasInvalid.ll @@ -1,4 +1,5 @@ -; RUN: not llvm-as < %s > /dev/null |& grep {expected top-level entity} +; RUN: not llvm-as < %s > /dev/null 2> %t +; RUN: grep "expected top-level entity" %t ; PR1577 @anInt = global i32 1 diff --git a/test/Assembler/2007-09-29-GC.ll b/test/Assembler/2007-09-29-GC.ll index 789a0fe1ed..9aefd0b041 100644 --- a/test/Assembler/2007-09-29-GC.ll +++ b/test/Assembler/2007-09-29-GC.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llvm-dis | grep {@f.*gc.*shadowstack} -; RUN: llvm-as < %s | llvm-dis | grep {@g.*gc.*java} +; RUN: llvm-as < %s | llvm-dis | grep "@f.*gc.*shadowstack" +; RUN: llvm-as < %s | llvm-dis | grep "@g.*gc.*java" define void @f() gc "shadowstack" { entry: diff --git a/test/Assembler/2007-12-11-AddressSpaces.ll b/test/Assembler/2007-12-11-AddressSpaces.ll index 0eb4a79730..7c9b5b5298 100644 --- a/test/Assembler/2007-12-11-AddressSpaces.ll +++ b/test/Assembler/2007-12-11-AddressSpaces.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llvm-dis | grep {addrspace(33)} | count 7 -; RUN: llvm-as < %s | llvm-dis | grep {addrspace(42)} | count 2 -; RUN: llvm-as < %s | llvm-dis | grep {addrspace(66)} | count 2 -; RUN: llvm-as < %s | llvm-dis | grep {addrspace(11)} | count 6 -; RUN: llvm-as < %s | llvm-dis | grep {addrspace(22)} | count 5 +; RUN: llvm-as < %s | llvm-dis | grep "addrspace(33)" | count 7 +; RUN: llvm-as < %s | llvm-dis | grep "addrspace(42)" | count 2 +; RUN: llvm-as < %s | llvm-dis | grep "addrspace(66)" | count 2 +; RUN: llvm-as < %s | llvm-dis | grep "addrspace(11)" | count 6 +; RUN: llvm-as < %s | llvm-dis | grep "addrspace(22)" | count 5 %struct.mystruct = type { i32, i32 addrspace(33)*, i32, i32 addrspace(33)* } @input = weak addrspace(42) global %struct.mystruct zeroinitializer ; <%struct.mystruct addrspace(42)*> [#uses=1] diff --git a/test/Assembler/2008-02-18-IntPointerCrash.ll b/test/Assembler/2008-02-18-IntPointerCrash.ll index 5a661ad9b9..4a33c36d57 100644 --- a/test/Assembler/2008-02-18-IntPointerCrash.ll +++ b/test/Assembler/2008-02-18-IntPointerCrash.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s |& grep {integer constant must have integer type} +; RUN: not llvm-as %s 2>&1 | grep "integer constant must have integer type" ; PR2060 define i8* @foo() { diff --git a/test/Assembler/2008-09-02-FunctionNotes2.ll b/test/Assembler/2008-09-02-FunctionNotes2.ll index 8a49e89902..97351e2a57 100644 --- a/test/Assembler/2008-09-02-FunctionNotes2.ll +++ b/test/Assembler/2008-09-02-FunctionNotes2.ll @@ -1,5 +1,5 @@ ; Test function notes -; RUN: not llvm-as %s -o /dev/null |& grep "Attributes noinline alwaysinline are incompatible" +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Attributes noinline alwaysinline are incompatible" define void @fn1() alwaysinline noinline { ret void } diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll index d3d374a07c..fc18ce708e 100644 --- a/test/Assembler/ConstantExprFold.ll +++ b/test/Assembler/ConstantExprFold.ll @@ -1,7 +1,7 @@ ; This test checks to make sure that constant exprs fold in some simple ; situations -; RUN: llvm-as < %s | llvm-dis | not grep {(} +; RUN: llvm-as < %s | llvm-dis | not grep "(" @A = global i64 0 diff --git a/test/Assembler/extractvalue-invalid-idx.ll b/test/Assembler/extractvalue-invalid-idx.ll index 9a215f7194..b5a398c2cc 100644 --- a/test/Assembler/extractvalue-invalid-idx.ll +++ b/test/Assembler/extractvalue-invalid-idx.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& FileCheck %s +; RUN: not llvm-as < %s 2>&1 | FileCheck %s ; PR4170 ; CHECK: invalid indices for extractvalue diff --git a/test/Assembler/getelementptr_struct.ll b/test/Assembler/getelementptr_struct.ll index bfebf29bd5..0293672233 100644 --- a/test/Assembler/getelementptr_struct.ll +++ b/test/Assembler/getelementptr_struct.ll @@ -1,4 +1,5 @@ -; RUN: not llvm-as < %s >/dev/null |& FileCheck %s +; RUN: not llvm-as < %s >/dev/null 2> %t +; RUN: FileCheck %s < %t ; Test the case of a incorrect indices type into struct ; CHECK: invalid getelementptr indices diff --git a/test/Assembler/insertvalue-invalid-idx.ll b/test/Assembler/insertvalue-invalid-idx.ll index 355d4e8c10..74642f4eb0 100644 --- a/test/Assembler/insertvalue-invalid-idx.ll +++ b/test/Assembler/insertvalue-invalid-idx.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& FileCheck %s +; RUN: not llvm-as < %s 2>&1 | FileCheck %s ; CHECK: invalid indices for insertvalue diff --git a/test/Assembler/invalid_cast.ll b/test/Assembler/invalid_cast.ll index f682835724..91e81c78eb 100644 --- a/test/Assembler/invalid_cast.ll +++ b/test/Assembler/invalid_cast.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& FileCheck %s +; RUN: not llvm-as < %s 2>&1 | FileCheck %s ; CHECK: invalid cast opcode for cast from '<4 x i64>' to '<3 x i8>' diff --git a/test/Assembler/invalid_cast2.ll b/test/Assembler/invalid_cast2.ll index a01b935629..5ce9546deb 100644 --- a/test/Assembler/invalid_cast2.ll +++ b/test/Assembler/invalid_cast2.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& FileCheck %s +; RUN: not llvm-as < %s 2>&1 | FileCheck %s ; CHECK: invalid cast opcode for cast from '<4 x i64>' to 'i8' diff --git a/test/Assembler/tls-models.ll b/test/Assembler/tls-models.ll new file mode 100644 index 0000000000..42f24962ae --- /dev/null +++ b/test/Assembler/tls-models.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; CHECK: @a = thread_local global i32 0 +; CHECK: @b = thread_local(localdynamic) global i32 0 +; CHECK: @c = thread_local(initialexec) global i32 0 +; CHECK: @d = thread_local(localexec) global i32 0 + +@a = thread_local global i32 0 +@b = thread_local(localdynamic) global i32 0 +@c = thread_local(initialexec) global i32 0 +@d = thread_local(localexec) global i32 0 diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index 9329286286..b8eb6d3e3d 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -84,7 +84,7 @@ let test_target () = (*===-- Constants ---------------------------------------------------------===*) let test_constants () = - (* RUN: grep {const_int.*i32.*-1} < %t.ll + (* RUN: grep "const_int.*i32.*-1" < %t.ll *) group "int"; let c = const_int i32_type (-1) in @@ -92,44 +92,44 @@ let test_constants () = insist (i32_type = type_of c); insist (is_constant c); - (* RUN: grep {const_sext_int.*i64.*-1} < %t.ll + (* RUN: grep "const_sext_int.*i64.*-1" < %t.ll *) group "sext int"; let c = const_int i64_type (-1) in ignore (define_global "const_sext_int" c m); insist (i64_type = type_of c); - (* RUN: grep {const_zext_int64.*i64.*4294967295} < %t.ll + (* RUN: grep "const_zext_int64.*i64.*4294967295" < %t.ll *) group "zext int64"; let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in ignore (define_global "const_zext_int64" c m); insist (i64_type = type_of c); - (* RUN: grep {const_int_string.*i32.*-1} < %t.ll + (* RUN: grep "const_int_string.*i32.*-1" < %t.ll *) group "int string"; let c = const_int_of_string i32_type "-1" 10 in ignore (define_global "const_int_string" c m); insist (i32_type = type_of c); - (* RUN: grep {const_string.*"cruel\\\\00world"} < %t.ll + (* RUN: grep 'const_string.*"cruel\00world"' < %t.ll *) group "string"; let c = const_string context "cruel\000world" in ignore (define_global "const_string" c m); insist ((array_type i8_type 11) = type_of c); - (* RUN: grep {const_stringz.*"hi\\\\00again\\\\00"} < %t.ll + (* RUN: grep 'const_stringz.*"hi\00again\00"' < %t.ll *) group "stringz"; let c = const_stringz context "hi\000again" in ignore (define_global "const_stringz" c m); insist ((array_type i8_type 9) = type_of c); - (* RUN: grep {const_single.*2.75} < %t.ll - * RUN: grep {const_double.*3.1459} < %t.ll - * RUN: grep {const_double_string.*1.25} < %t.ll + (* RUN: grep "const_single.*2.75" < %t.ll + * RUN: grep "const_double.*3.1459" < %t.ll + * RUN: grep "const_double_string.*1.25" < %t.ll *) begin group "real"; let cs = const_float float_type 2.75 in @@ -150,14 +150,14 @@ let test_constants () = let three = const_int i32_type 3 in let four = const_int i32_type 4 in - (* RUN: grep {const_array.*\\\[i32 3, i32 4\\\]} < %t.ll + (* RUN: grep "const_array.*[i32 3, i32 4]" < %t.ll *) group "array"; let c = const_array i32_type [| three; four |] in ignore (define_global "const_array" c m); insist ((array_type i32_type 2) = (type_of c)); - (* RUN: grep {const_vector.*<i16 1, i16 2.*>} < %t.ll + (* RUN: grep "const_vector.*<i16 1, i16 2.*>" < %t.ll *) group "vector"; let c = const_vector [| one; two; one; two; @@ -165,7 +165,7 @@ let test_constants () = ignore (define_global "const_vector" c m); insist ((vector_type i16_type 8) = (type_of c)); - (* RUN: grep {const_structure.*.i16 1, i16 2, i32 3, i32 4} < %t.ll + (* RUN: grep "const_structure.*.i16 1, i16 2, i32 3, i32 4" < %t.ll *) group "structure"; let c = const_struct context [| one; two; three; four |] in @@ -173,27 +173,27 @@ let test_constants () = insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |]) = (type_of c)); - (* RUN: grep {const_null.*zeroinit} < %t.ll + (* RUN: grep "const_null.*zeroinit" < %t.ll *) group "null"; let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type; double_type |]) in ignore (define_global "const_null" c m); - (* RUN: grep {const_all_ones.*-1} < %t.ll + (* RUN: grep "const_all_ones.*-1" < %t.ll *) group "all ones"; let c = const_all_ones i64_type in ignore (define_global "const_all_ones" c m); group "pointer null"; begin - (* RUN: grep {const_pointer_null = global i64\\* null} < %t.ll + (* RUN: grep "const_pointer_null = global i64* null" < %t.ll *) let c = const_pointer_null (pointer_type i64_type) in ignore (define_global "const_pointer_null" c m); end; - (* RUN: grep {const_undef.*undef} < %t.ll + (* RUN: grep "const_undef.*undef" < %t.ll *) group "undef"; let c = undef i1_type in @@ -202,35 +202,35 @@ let test_constants () = insist (is_undef c); group "constant arithmetic"; - (* RUN: grep {@const_neg = global i64 sub} < %t.ll - * RUN: grep {@const_nsw_neg = global i64 sub nsw } < %t.ll - * RUN: grep {@const_nuw_neg = global i64 sub nuw } < %t.ll - * RUN: grep {@const_fneg = global double fsub } < %t.ll - * RUN: grep {@const_not = global i64 xor } < %t.ll - * RUN: grep {@const_add = global i64 add } < %t.ll - * RUN: grep {@const_nsw_add = global i64 add nsw } < %t.ll - * RUN: grep {@const_nuw_add = global i64 add nuw } < %t.ll - * RUN: grep {@const_fadd = global double fadd } < %t.ll - * RUN: grep {@const_sub = global i64 sub } < %t.ll - * RUN: grep {@const_nsw_sub = global i64 sub nsw } < %t.ll - * RUN: grep {@const_nuw_sub = global i64 sub nuw } < %t.ll - * RUN: grep {@const_fsub = global double fsub } < %t.ll - * RUN: grep {@const_mul = global i64 mul } < %t.ll - * RUN: grep {@const_nsw_mul = global i64 mul nsw } < %t.ll - * RUN: grep {@const_nuw_mul = global i64 mul nuw } < %t.ll - * RUN: grep {@const_fmul = global double fmul } < %t.ll - * RUN: grep {@const_udiv = global i64 udiv } < %t.ll - * RUN: grep {@const_sdiv = global i64 sdiv } < %t.ll - * RUN: grep {@const_exact_sdiv = global i64 sdiv exact } < %t.ll - * RUN: grep {@const_fdiv = global double fdiv } < %t.ll - * RUN: grep {@const_urem = global i64 urem } < %t.ll - * RUN: grep {@const_srem = global i64 srem } < %t.ll - * RUN: grep {@const_frem = global double frem } < %t.ll - * RUN: grep {@const_and = global i64 and } < %t.ll - * RUN: grep {@const_or = global i64 or } < %t.ll - * RUN: grep {@const_xor = global i64 xor } < %t.ll - * RUN: grep {@const_icmp = global i1 icmp sle } < %t.ll - * RUN: grep {@const_fcmp = global i1 fcmp ole } < %t.ll + (* RUN: grep "@const_neg = global i64 sub" < %t.ll + * RUN: grep "@const_nsw_neg = global i64 sub nsw " < %t.ll + * RUN: grep "@const_nuw_neg = global i64 sub nuw " < %t.ll + * RUN: grep "@const_fneg = global double fsub " < %t.ll + * RUN: grep "@const_not = global i64 xor " < %t.ll + * RUN: grep "@const_add = global i64 add " < %t.ll + * RUN: grep "@const_nsw_add = global i64 add nsw " < %t.ll + * RUN: grep "@const_nuw_add = global i64 add nuw " < %t.ll + * RUN: grep "@const_fadd = global double fadd " < %t.ll + * RUN: grep "@const_sub = global i64 sub " < %t.ll + * RUN: grep "@const_nsw_sub = global i64 sub nsw " < %t.ll + * RUN: grep "@const_nuw_sub = global i64 sub nuw " < %t.ll + * RUN: grep "@const_fsub = global double fsub " < %t.ll + * RUN: grep "@const_mul = global i64 mul " < %t.ll + * RUN: grep "@const_nsw_mul = global i64 mul nsw " < %t.ll + * RUN: grep "@const_nuw_mul = global i64 mul nuw " < %t.ll + * RUN: grep "@const_fmul = global double fmul " < %t.ll + * RUN: grep "@const_udiv = global i64 udiv " < %t.ll + * RUN: grep "@const_sdiv = global i64 sdiv " < %t.ll + * RUN: grep "@const_exact_sdiv = global i64 sdiv exact " < %t.ll + * RUN: grep "@const_fdiv = global double fdiv " < %t.ll + * RUN: grep "@const_urem = global i64 urem " < %t.ll + * RUN: grep "@const_srem = global i64 srem " < %t.ll + * RUN: grep "@const_frem = global double frem " < %t.ll + * RUN: grep "@const_and = global i64 and " < %t.ll + * RUN: grep "@const_or = global i64 or " < %t.ll + * RUN: grep "@const_xor = global i64 xor " < %t.ll + * RUN: grep "@const_icmp = global i1 icmp sle " < %t.ll + * RUN: grep "@const_fcmp = global i1 fcmp ole " < %t.ll *) let void_ptr = pointer_type i8_type in let five = const_int i64_type 5 in @@ -269,18 +269,18 @@ let test_constants () = ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m); group "constant casts"; - (* RUN: grep {const_trunc.*trunc} < %t.ll - * RUN: grep {const_sext.*sext} < %t.ll - * RUN: grep {const_zext.*zext} < %t.ll - * RUN: grep {const_fptrunc.*fptrunc} < %t.ll - * RUN: grep {const_fpext.*fpext} < %t.ll - * RUN: grep {const_uitofp.*uitofp} < %t.ll - * RUN: grep {const_sitofp.*sitofp} < %t.ll - * RUN: grep {const_fptoui.*fptoui} < %t.ll - * RUN: grep {const_fptosi.*fptosi} < %t.ll - * RUN: grep {const_ptrtoint.*ptrtoint} < %t.ll - * RUN: grep {const_inttoptr.*inttoptr} < %t.ll - * RUN: grep {const_bitcast.*bitcast} < %t.ll + (* RUN: grep "const_trunc.*trunc" < %t.ll + * RUN: grep "const_sext.*sext" < %t.ll + * RUN: grep "const_zext.*zext" < %t.ll + * RUN: grep "const_fptrunc.*fptrunc" < %t.ll + * RUN: grep "const_fpext.*fpext" < %t.ll + * RUN: grep "const_uitofp.*uitofp" < %t.ll + * RUN: grep "const_sitofp.*sitofp" < %t.ll + * RUN: grep "const_fptoui.*fptoui" < %t.ll + * RUN: grep "const_fptosi.*fptosi" < %t.ll + * RUN: grep "const_ptrtoint.*ptrtoint" < %t.ll + * RUN: grep "const_inttoptr.*inttoptr" < %t.ll + * RUN: grep "const_bitcast.*bitcast" < %t.ll *) let i128_type = integer_type context 128 in ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five) @@ -302,12 +302,12 @@ let test_constants () = ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m); group "misc constants"; - (* RUN: grep {const_size_of.*getelementptr.*null} < %t.ll - * RUN: grep {const_gep.*getelementptr} < %t.ll - * RUN: grep {const_select.*select} < %t.ll - * RUN: grep {const_extractelement.*extractelement} < %t.ll - * RUN: grep {const_insertelement.*insertelement} < %t.ll - * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll + (* RUN: grep "const_size_of.*getelementptr.*null" < %t.ll + * RUN: grep "const_gep.*getelementptr" < %t.ll + * RUN: grep "const_select.*select" < %t.ll + * RUN: grep "const_extractelement.*extractelement" < %t.ll + * RUN: grep "const_insertelement.*insertelement" < %t.ll + * RUN: grep "const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>" < %t.ll *) ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m); ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m); @@ -356,7 +356,7 @@ let test_global_values () = let (++) x f = f x; x in let zero32 = const_null i32_type in - (* RUN: grep {GVal01} < %t.ll + (* RUN: grep "GVal01" < %t.ll *) group "naming"; let g = define_global "TEMPORARY" zero32 m in @@ -364,28 +364,28 @@ let test_global_values () = set_value_name "GVal01" g; insist ("GVal01" = value_name g); - (* RUN: grep {GVal02.*linkonce} < %t.ll + (* RUN: grep "GVal02.*linkonce" < %t.ll *) group "linkage"; let g = define_global "GVal02" zero32 m ++ set_linkage Linkage.Link_once in insist (Linkage.Link_once = linkage g); - (* RUN: grep {GVal03.*Hanalei} < %t.ll + (* RUN: grep "GVal03.*Hanalei" < %t.ll *) group "section"; let g = define_global "GVal03" zero32 m ++ set_section "Hanalei" in insist ("Hanalei" = section g); - (* RUN: grep {GVal04.*hidden} < %t.ll + (* RUN: grep "GVal04.*hidden" < %t.ll *) group "visibility"; let g = define_global "GVal04" zero32 m ++ set_visibility Visibility.Hidden in insist (Visibility.Hidden = visibility g); - (* RUN: grep {GVal05.*align 128} < %t.ll + (* RUN: grep "GVal05.*align 128" < %t.ll *) group "alignment"; let g = define_global "GVal05" zero32 m ++ @@ -400,7 +400,7 @@ let test_global_variables () = let fourty_two32 = const_int i32_type 42 in group "declarations"; begin - (* RUN: grep {GVar01.*external} < %t.ll + (* RUN: grep "GVar01.*external" < %t.ll *) insist (None == lookup_global "GVar01" m); let g = declare_global i32_type "GVar01" m in @@ -422,8 +422,8 @@ let test_global_variables () = end; group "definitions"; begin - (* RUN: grep {GVar02.*42} < %t.ll - * RUN: grep {GVar03.*42} < %t.ll + (* RUN: grep "GVar02.*42" < %t.ll + * RUN: grep "GVar03.*42" < %t.ll *) let g = define_global "GVar02" fourty_two32 m in let g2 = declare_global i32_type "GVar03" m ++ @@ -440,20 +440,20 @@ let test_global_variables () = insist ((global_initializer g) == (global_initializer g2)); end; - (* RUN: grep {GVar04.*thread_local} < %t.ll + (* RUN: grep "GVar04.*thread_local" < %t.ll *) group "threadlocal"; let g = define_global "GVar04" fourty_two32 m ++ set_thread_local true in insist (is_thread_local g); - (* RUN: grep -v {GVar05} < %t.ll + (* RUN: grep -v "GVar05" < %t.ll *) group "delete"; let g = define_global "GVar05" fourty_two32 m in delete_global g; - (* RUN: grep -v {ConstGlobalVar.*constant} < %t.ll + (* RUN: grep -v "ConstGlobalVar.*constant" < %t.ll *) group "constant"; let g = define_global "ConstGlobalVar" fourty_two32 m in @@ -542,7 +542,7 @@ let test_users () = (*===-- Aliases -----------------------------------------------------------===*) let test_aliases () = - (* RUN: grep {@alias = alias i32\\* @aliasee} < %t.ll + (* RUN: grep "@alias = alias i32* @aliasee" < %t.ll *) let v = declare_global i32_type "aliasee" m in ignore (add_alias m (pointer_type i32_type) v "alias") @@ -554,7 +554,7 @@ let test_functions () = let ty = function_type i32_type [| i32_type; i64_type |] in let ty2 = function_type i8_type [| i8_type; i64_type |] in - (* RUN: grep {declare i32 @Fn1\(i32, i64\)} < %t.ll + (* RUN: grep "declare i32 @Fn1\(i32, i64\)" < %t.ll *) begin group "declare"; insist (None = lookup_function "Fn1" m); @@ -570,13 +570,13 @@ let test_functions () = insist (m == global_parent fn) end; - (* RUN: grep -v {Fn2} < %t.ll + (* RUN: grep -v "Fn2" < %t.ll *) group "delete"; let fn = declare_function "Fn2" ty m in delete_function fn; - (* RUN: grep {define.*Fn3} < %t.ll + (* RUN: grep "define.*Fn3" < %t.ll *) group "define"; let fn = define_function "Fn3" ty m in @@ -584,7 +584,7 @@ let test_functions () = insist (1 = Array.length (basic_blocks fn)); ignore (build_unreachable (builder_at_end context (entry_block fn))); - (* RUN: grep {define.*Fn4.*Param1.*Param2} < %t.ll + (* RUN: grep "define.*Fn4.*Param1.*Param2" < %t.ll *) group "params"; let fn = define_function "Fn4" ty m in @@ -598,7 +598,7 @@ let test_functions () = set_value_name "Param2" params.(1); ignore (build_unreachable (builder_at_end context (entry_block fn))); - (* RUN: grep {fastcc.*Fn5} < %t.ll + (* RUN: grep "fastcc.*Fn5" < %t.ll *) group "callconv"; let fn = define_function "Fn5" ty m in @@ -608,7 +608,7 @@ let test_functions () = ignore (build_unreachable (builder_at_end context (entry_block fn))); begin group "gc"; - (* RUN: grep {Fn6.*gc.*shadowstack} < %t.ll + (* RUN: grep "Fn6.*gc.*shadowstack" < %t.ll *) let fn = define_function "Fn6" ty m in insist (None = gc fn); @@ -694,7 +694,7 @@ let test_params () = let test_basic_blocks () = let ty = function_type void_type [| |] in - (* RUN: grep {Bb1} < %t.ll + (* RUN: grep "Bb1" < %t.ll *) group "entry"; let fn = declare_function "X" ty m in @@ -825,7 +825,7 @@ let test_builder () = group "ret void"; begin - (* RUN: grep {ret void} < %t.ll + (* RUN: grep "ret void" < %t.ll *) let fty = function_type void_type [| |] in let fn = declare_function "X6" fty m in @@ -835,7 +835,7 @@ let test_builder () = group "ret aggregate"; begin - (* RUN: grep {ret \{ i8, i64 \} \{ i8 4, i64 5 \}} < %t.ll + (* RUN: grep "ret { i8, i64 } { i8 4, i64 5 }" < %t.ll *) let sty = struct_type context [| i8_type; i64_type |] in let fty = function_type sty [| |] in @@ -895,14 +895,14 @@ let test_builder () = end; group "ret"; begin - (* RUN: grep {ret.*P1} < %t.ll + (* RUN: grep "ret.*P1" < %t.ll *) let ret = build_ret p1 atentry in position_before ret atentry end; group "br"; begin - (* RUN: grep {br.*Bb02} < %t.ll + (* RUN: grep "br.*Bb02" < %t.ll *) let bb02 = append_block context "Bb02" fn in let b = builder_at_end context bb02 in @@ -910,7 +910,7 @@ let test_builder () = end; group "cond_br"; begin - (* RUN: grep {br.*build_br.*Bb03.*Bb00} < %t.ll + (* RUN: grep "br.*build_br.*Bb03.*Bb00" < %t.ll *) let bb03 = append_block context "Bb03" fn in let b = builder_at_end context bb03 in @@ -919,8 +919,8 @@ let test_builder () = end; group "switch"; begin - (* RUN: grep {switch.*P1.*SwiBlock3} < %t.ll - * RUN: grep {2,.*SwiBlock2} < %t.ll + (* RUN: grep "switch.*P1.*SwiBlock3" < %t.ll + * RUN: grep "2,.*SwiBlock2" < %t.ll *) let bb1 = append_block context "SwiBlock1" fn in let bb2 = append_block context "SwiBlock2" fn in @@ -934,9 +934,9 @@ let test_builder () = end; group "malloc/free"; begin - (* RUN: grep {call.*@malloc(i32 ptrtoint} < %t.ll - * RUN: grep {call.*@free(i8\*} < %t.ll - * RUN: grep {call.*@malloc(i32 %} < %t.ll + (* RUN: grep "call.*@malloc(i32 ptrtoint" < %t.ll + * RUN: grep "call.*@free(i8*" < %t.ll + * RUN: grep "call.*@malloc(i32 %" < %t.ll *) let bb1 = append_block context "MallocBlock1" fn in let m1 = (build_malloc (pointer_type i32_type) "m1" @@ -947,7 +947,7 @@ let test_builder () = end; group "indirectbr"; begin - (* RUN: grep {indirectbr i8\\* blockaddress(@X7, %IBRBlock2), \\\[label %IBRBlock2, label %IBRBlock3\\\]} < %t.ll + (* RUN: grep "indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]" < %t.ll *) let bb1 = append_block context "IBRBlock1" fn in @@ -964,8 +964,8 @@ let test_builder () = end; group "invoke"; begin - (* RUN: grep {build_invoke.*invoke.*P1.*P2} < %t.ll - * RUN: grep {to.*Bb04.*unwind.*Bblpad} < %t.ll + (* RUN: grep "build_invoke.*invoke.*P1.*P2" < %t.ll + * RUN: grep "to.*Bb04.*unwind.*Bblpad" < %t.ll *) let bb04 = append_block context "Bb04" fn in let b = builder_at_end context bb04 in @@ -973,7 +973,7 @@ let test_builder () = end; group "unreachable"; begin - (* RUN: grep {unreachable} < %t.ll + (* RUN: grep "unreachable" < %t.ll *) let bb06 = append_block context "Bb06" fn in let b = builder_at_end context bb06 in @@ -984,36 +984,36 @@ let test_builder () = let bb07 = append_block context "Bb07" fn in let b = builder_at_end context bb07 in - (* RUN: grep {%build_add = add i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nsw_add = add nsw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nuw_add = add nuw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_fadd = fadd float %F1, %F2} < %t.ll - * RUN: grep {%build_sub = sub i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nsw_sub = sub nsw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nuw_sub = sub nuw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_fsub = fsub float %F1, %F2} < %t.ll - * RUN: grep {%build_mul = mul i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nsw_mul = mul nsw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_nuw_mul = mul nuw i32 %P1, %P2} < %t.ll - * RUN: grep {%build_fmul = fmul float %F1, %F2} < %t.ll - * RUN: grep {%build_udiv = udiv i32 %P1, %P2} < %t.ll - * RUN: grep {%build_sdiv = sdiv i32 %P1, %P2} < %t.ll - * RUN: grep {%build_exact_sdiv = sdiv exact i32 %P1, %P2} < %t.ll - * RUN: grep {%build_fdiv = fdiv float %F1, %F2} < %t.ll - * RUN: grep {%build_urem = urem i32 %P1, %P2} < %t.ll - * RUN: grep {%build_srem = srem i32 %P1, %P2} < %t.ll - * RUN: grep {%build_frem = frem float %F1, %F2} < %t.ll - * RUN: grep {%build_shl = shl i32 %P1, %P2} < %t.ll - * RUN: grep {%build_lshl = lshr i32 %P1, %P2} < %t.ll - * RUN: grep {%build_ashl = ashr i32 %P1, %P2} < %t.ll - * RUN: grep {%build_and = and i32 %P1, %P2} < %t.ll - * RUN: grep {%build_or = or i32 %P1, %P2} < %t.ll - * RUN: grep {%build_xor = xor i32 %P1, %P2} < %t.ll - * RUN: grep {%build_neg = sub i32 0, %P1} < %t.ll - * RUN: grep {%build_nsw_neg = sub nsw i32 0, %P1} < %t.ll - * RUN: grep {%build_nuw_neg = sub nuw i32 0, %P1} < %t.ll - * RUN: grep {%build_fneg = fsub float .*0.*, %F1} < %t.ll - * RUN: grep {%build_not = xor i32 %P1, -1} < %t.ll + (* RUN: grep "%build_add = add i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nsw_add = add nsw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nuw_add = add nuw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_fadd = fadd float %F1, %F2" < %t.ll + * RUN: grep "%build_sub = sub i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nsw_sub = sub nsw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nuw_sub = sub nuw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_fsub = fsub float %F1, %F2" < %t.ll + * RUN: grep "%build_mul = mul i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nsw_mul = mul nsw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_nuw_mul = mul nuw i32 %P1, %P2" < %t.ll + * RUN: grep "%build_fmul = fmul float %F1, %F2" < %t.ll + * RUN: grep "%build_udiv = udiv i32 %P1, %P2" < %t.ll + * RUN: grep "%build_sdiv = sdiv i32 %P1, %P2" < %t.ll + * RUN: grep "%build_exact_sdiv = sdiv exact i32 %P1, %P2" < %t.ll + * RUN: grep "%build_fdiv = fdiv float %F1, %F2" < %t.ll + * RUN: grep "%build_urem = urem i32 %P1, %P2" < %t.ll + * RUN: grep "%build_srem = srem i32 %P1, %P2" < %t.ll + * RUN: grep "%build_frem = frem float %F1, %F2" < %t.ll + * RUN: grep "%build_shl = shl i32 %P1, %P2" < %t.ll + * RUN: grep "%build_lshl = lshr i32 %P1, %P2" < %t.ll + * RUN: grep "%build_ashl = ashr i32 %P1, %P2" < %t.ll + * RUN: grep "%build_and = and i32 %P1, %P2" < %t.ll + * RUN: grep "%build_or = or i32 %P1, %P2" < %t.ll + * RUN: grep "%build_xor = xor i32 %P1, %P2" < %t.ll + * RUN: grep "%build_neg = sub i32 0, %P1" < %t.ll + * RUN: grep "%build_nsw_neg = sub nsw i32 0, %P1" < %t.ll + * RUN: grep "%build_nuw_neg = sub nuw i32 0, %P1" < %t.ll + * RUN: grep "%build_fneg = fsub float .*0.*, %F1" < %t.ll + * RUN: grep "%build_not = xor i32 %P1, -1" < %t.ll *) ignore (build_add p1 p2 "build_add" b); ignore (build_nsw_add p1 p2 "build_nsw_add" b); @@ -1052,13 +1052,13 @@ let test_builder () = let bb08 = append_block context "Bb08" fn in let b = builder_at_end context bb08 in - (* RUN: grep {%build_alloca = alloca i32} < %t.ll - * RUN: grep {%build_array_alloca = alloca i32, i32 %P2} < %t.ll - * RUN: grep {%build_load = load i32\\* %build_array_alloca} < %t.ll - * RUN: grep {store i32 %P2, i32\\* %build_alloca} < %t.ll - * RUN: grep {%build_gep = getelementptr i32\\* %build_array_alloca, i32 %P2} < %t.ll - * RUN: grep {%build_in_bounds_gep = getelementptr inbounds i32\\* %build_array_alloca, i32 %P2} < %t.ll - * RUN: grep {%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1} < %t.ll + (* RUN: grep "%build_alloca = alloca i32" < %t.ll + * RUN: grep "%build_array_alloca = alloca i32, i32 %P2" < %t.ll + * RUN: grep "%build_load = load i32* %build_array_alloca" < %t.ll + * RUN: grep "store i32 %P2, i32* %build_alloca" < %t.ll + * RUN: grep "%build_gep = getelementptr i32* %build_array_alloca, i32 %P2" < %t.ll + * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2" < %t.ll + * RUN: grep "%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1" < %t.ll *) let alloca = build_alloca i32_type "build_alloca" b in let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in @@ -1090,30 +1090,30 @@ let test_builder () = group "casts"; begin let void_ptr = pointer_type i8_type in - (* RUN: grep {%build_trunc = trunc i32 %P1 to i8} < %t.ll - * RUN: grep {%build_trunc2 = trunc i32 %P1 to i8} < %t.ll - * RUN: grep {%build_trunc3 = trunc i32 %P1 to i8} < %t.ll - * RUN: grep {%build_zext = zext i8 %build_trunc to i32} < %t.ll - * RUN: grep {%build_zext2 = zext i8 %build_trunc to i32} < %t.ll - * RUN: grep {%build_sext = sext i32 %build_zext to i64} < %t.ll - * RUN: grep {%build_sext2 = sext i32 %build_zext to i64} < %t.ll - * RUN: grep {%build_sext3 = sext i32 %build_zext to i64} < %t.ll - * RUN: grep {%build_uitofp = uitofp i64 %build_sext to float} < %t.ll - * RUN: grep {%build_sitofp = sitofp i32 %build_zext to double} < %t.ll - * RUN: grep {%build_fptoui = fptoui float %build_uitofp to i32} < %t.ll - * RUN: grep {%build_fptosi = fptosi double %build_sitofp to i64} < %t.ll - * RUN: grep {%build_fptrunc = fptrunc double %build_sitofp to float} < %t.ll - * RUN: grep {%build_fptrunc2 = fptrunc double %build_sitofp to float} < %t.ll - * RUN: grep {%build_fpext = fpext float %build_fptrunc to double} < %t.ll - * RUN: grep {%build_fpext2 = fpext float %build_fptrunc to double} < %t.ll - * RUN: grep {%build_inttoptr = inttoptr i32 %P1 to i8\\*} < %t.ll - * RUN: grep {%build_ptrtoint = ptrtoint i8\\* %build_inttoptr to i64} < %t.ll - * RUN: grep {%build_ptrtoint2 = ptrtoint i8\\* %build_inttoptr to i64} < %t.ll - * RUN: grep {%build_bitcast = bitcast i64 %build_ptrtoint to double} < %t.ll - * RUN: grep {%build_bitcast2 = bitcast i64 %build_ptrtoint to double} < %t.ll - * RUN: grep {%build_bitcast3 = bitcast i64 %build_ptrtoint to double} < %t.ll - * RUN: grep {%build_bitcast4 = bitcast i64 %build_ptrtoint to double} < %t.ll - * RUN: grep {%build_pointercast = bitcast i8\\* %build_inttoptr to i16\\*} < %t.ll + (* RUN: grep "%build_trunc = trunc i32 %P1 to i8" < %t.ll + * RUN: grep "%build_trunc2 = trunc i32 %P1 to i8" < %t.ll + * RUN: grep "%build_trunc3 = trunc i32 %P1 to i8" < %t.ll + * RUN: grep "%build_zext = zext i8 %build_trunc to i32" < %t.ll + * RUN: grep "%build_zext2 = zext i8 %build_trunc to i32" < %t.ll + * RUN: grep "%build_sext = sext i32 %build_zext to i64" < %t.ll + * RUN: grep "%build_sext2 = sext i32 %build_zext to i64" < %t.ll + * RUN: grep "%build_sext3 = sext i32 %build_zext to i64" < %t.ll + * RUN: grep "%build_uitofp = uitofp i64 %build_sext to float" < %t.ll + * RUN: grep "%build_sitofp = sitofp i32 %build_zext to double" < %t.ll + * RUN: grep "%build_fptoui = fptoui float %build_uitofp to i32" < %t.ll + * RUN: grep "%build_fptosi = fptosi double %build_sitofp to i64" < %t.ll + * RUN: grep "%build_fptrunc = fptrunc double %build_sitofp to float" < %t.ll + * RUN: grep "%build_fptrunc2 = fptrunc double %build_sitofp to float" < %t.ll + * RUN: grep "%build_fpext = fpext float %build_fptrunc to double" < %t.ll + * RUN: grep "%build_fpext2 = fpext float %build_fptrunc to double" < %t.ll + * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8*" < %t.ll + * RUN: grep "%build_ptrtoint = ptrtoint i8* %build_inttoptr to i64" < %t.ll + * RUN: grep "%build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64" < %t.ll + * RUN: grep "%build_bitcast = bitcast i64 %build_ptrtoint to double" < %t.ll + * RUN: grep "%build_bitcast2 = bitcast i64 %build_ptrtoint to double" < %t.ll + * RUN: grep "%build_bitcast3 = bitcast i64 %build_ptrtoint to double" < %t.ll + * RUN: grep "%build_bitcast4 = bitcast i64 %build_ptrtoint to double" < %t.ll + * RUN: grep "%build_pointercast = bitcast i8* %build_inttoptr to i16*" < %t.ll *) let inst28 = build_trunc p1 i8_type "build_trunc" atentry in let inst29 = build_zext inst28 i32_type "build_zext" atentry in @@ -1143,13 +1143,13 @@ let test_builder () = end; group "comparisons"; begin - (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll - * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll - * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll - * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll - * RUN: grep {%build_is_null.*= icmp eq.*%X0,.*null} < %t.ll - * RUN: grep {%build_is_not_null = icmp ne i8\\* %X1, null} < %t.ll - * RUN: grep {%build_ptrdiff} < %t.ll + (* RUN: grep "%build_icmp_ne = icmp ne i32 %P1, %P2" < %t.ll + * RUN: grep "%build_icmp_sle = icmp sle i32 %P2, %P1" < %t.ll + * RUN: grep "%build_fcmp_false = fcmp false float %F1, %F2" < %t.ll + * RUN: grep "%build_fcmp_true = fcmp true float %F2, %F1" < %t.ll + * RUN: grep "%build_is_null.*= icmp eq.*%X0,.*null" < %t.ll + * RUN: grep "%build_is_not_null = icmp ne i8* %X1, null" < %t.ll + * RUN: grep "%build_ptrdiff" < %t.ll *) ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry); ignore (build_icmp Icmp.Sle p2 p1 "build_icmp_sle" atentry); @@ -1165,14 +1165,14 @@ let test_builder () = end; group "miscellaneous"; begin - (* RUN: grep {%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)} < %t.ll - * RUN: grep {%build_select = select i1 %build_icmp, i32 %P1, i32 %P2} < %t.ll - * RUN: grep {%build_va_arg = va_arg i8\\*\\* null, i32} < %t.ll - * RUN: grep {%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2} < %t.ll - * RUN: grep {%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2} < %t.ll - * RUN: grep {%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>} < %t.ll - * RUN: grep {%build_insertvalue0 = insertvalue.*%bl, i32 1, 0} < %t.ll - * RUN: grep {%build_extractvalue = extractvalue.*%build_insertvalue1, 1} < %t.ll + (* RUN: grep "%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)" < %t.ll + * RUN: grep "%build_select = select i1 %build_icmp, i32 %P1, i32 %P2" < %t.ll + * RUN: grep "%build_va_arg = va_arg i8** null, i32" < %t.ll + * RUN: grep "%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2" < %t.ll + * RUN: grep "%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2" < %t.ll + * RUN: grep "%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>" < %t.ll + * RUN: grep "%build_insertvalue0 = insertvalue.*%bl, i32 1, 0" < %t.ll + * RUN: grep "%build_extractvalue = extractvalue.*%build_insertvalue1, 1" < %t.ll *) let ci = build_call fn [| p2; p1 |] "build_call" atentry in insist (CallConv.c = instruction_call_conv ci); @@ -1215,8 +1215,8 @@ let test_builder () = end; group "metadata"; begin - (* RUN: grep {%metadata = add i32 %P1, %P2, !test !0} < %t.ll - * RUN: grep {!0 = metadata !\{i32 1, metadata !"metadata test"\}} < %t.ll + (* RUN: grep '%metadata = add i32 %P1, %P2, !test !0' < %t.ll + * RUN: grep '!0 = metadata !{i32 1, metadata !"metadata test"}' < %t.ll *) let i = build_add p1 p2 "metadata" atentry in insist ((has_metadata i) = false); @@ -1240,8 +1240,8 @@ let test_builder () = end; group "dbg"; begin - (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll - * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll + (* RUN: grep "%dbg = add i32 %P1, %P2, !dbg !1" < %t.ll + * RUN: grep "!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}" < %t.ll *) insist ((current_debug_location atentry) = None); @@ -1261,7 +1261,7 @@ let test_builder () = end; group "phi"; begin - (* RUN: grep {PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2} < %t.ll + (* RUN: grep "PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2" < %t.ll *) let b1 = append_block context "PhiBlock1" fn in let b2 = append_block context "PhiBlock2" fn in diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll index b972753da1..8502b0d55a 100644 --- a/test/Bitcode/null-type.ll +++ b/test/Bitcode/null-type.ll @@ -1,4 +1,5 @@ -; RUN: not llvm-dis < %s.bc > /dev/null |& FileCheck %s +; RUN: not llvm-dis < %s.bc > /dev/null 2> %t +; RUN: FileCheck %s < %t ; PR8494 ; CHECK: Invalid MODULE_CODE_FUNCTION record diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ebea47d691..09cb94f4bd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,84 +1,27 @@ -foreach(c ${LLVM_TARGETS_TO_BUILD}) - set(TARGETS_BUILT "${TARGETS_BUILT} ${c}") -endforeach(c) -set(TARGETS_TO_BUILD ${TARGETS_BUILT}) - -# FIXME: This won't work for project files, we need to use a --param. -set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}") -set(SHLIBEXT "${LTDL_SHLIB_EXT}") - -set(SHLIBDIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}") - -if(BUILD_SHARED_LIBS) - set(LLVM_SHARED_LIBS_ENABLED "1") -else() - set(LLVM_SHARED_LIBS_ENABLED "0") -endif(BUILD_SHARED_LIBS) - -if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - set(SHLIBPATH_VAR "DYLD_LIBRARY_PATH") -else() # Default for all other unix like systems. - # CMake hardcodes the library locaction using rpath. - # Therefore LD_LIBRARY_PATH is not required to run binaries in the - # build dir. We pass it anyways. - set(SHLIBPATH_VAR "LD_LIBRARY_PATH") -endif() - -set(LIT_ARGS "${LLVM_LIT_ARGS}") -separate_arguments(LIT_ARGS) - -configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in - ${CMAKE_CURRENT_BINARY_DIR}/site.exp) - -MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit) - -# Configuration-time: See Unit/lit.site.cfg.in -set(LLVM_BUILD_MODE "%(build_mode)s") - -set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) -set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) -set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s") -set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) -set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED}) -set(SHLIBPATH_VAR ${SHLIBPATH_VAR}) - -if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE) - set(ENABLE_ASSERTIONS "1") -else() - set(ENABLE_ASSERTIONS "0") -endif() - -set(HOST_OS ${CMAKE_HOST_SYSTEM_NAME}) -set(HOST_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) - -configure_file( +configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg - @ONLY) -configure_file( + ) +configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg - @ONLY) - -add_custom_target(check - COMMAND ${PYTHON_EXECUTABLE} - ${LLVM_SOURCE_DIR}/utils/lit/lit.py - --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg - --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg - --param build_config=${CMAKE_CFG_INTDIR} - --param build_mode=${RUNTIME_BUILD_MODE} - ${LIT_ARGS} - ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Running LLVM regression tests") - -add_custom_target(check.deps) -add_dependencies(check check.deps) -add_dependencies(check.deps - UnitTests - BugpointPasses LLVMHello - llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump - llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj - macho-dump opt - FileCheck count not) -set_target_properties(check.deps PROPERTIES FOLDER "Tests") + ) + +add_lit_testsuite(check-llvm "Running the LLVM regression tests" + ${CMAKE_CURRENT_BINARY_DIR} + PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg + DEPENDS UnitTests + BugpointPasses LLVMHello + llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump + llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj + macho-dump opt + FileCheck count not + ) +set_target_properties(check-llvm PROPERTIES FOLDER "Tests") + +# Setup a legacy alias for 'check-llvm'. This will likely change to be an +# alias for 'check-all' at some point in the future. +add_custom_target(check) +add_dependencies(check check-llvm) +set_target_properties(check PROPERTIES FOLDER "Tests") diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll index 33f935e960..a63cdd46e2 100644 --- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll +++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 | grep r9 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ -; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer +; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer ; | grep 35 define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) { diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll index b543c57e1a..8d3337c29f 100644 --- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll +++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep {add.*#0} +; RUN: llc < %s -march=arm | not grep "add.*#0" define i32 @foo() { entry: diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll index d2eb85d356..670048bf25 100644 --- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll +++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | not grep {str.*\\!} +; RUN: llc < %s -march=arm | not grep "str.*\!" %struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 } %struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll index 352672274d..7342f69631 100644 --- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll +++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep {swi 107} +; RUN: llc < %s -march=arm | grep "swi 107" define i32 @_swilseek(i32) nounwind { entry: diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll index 5ce600d1a9..b21bb006e3 100644 --- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll +++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS +; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; Radar 10266272 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios4.0.0" diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll index 872eca34ad..f1c85f1b41 100644 --- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -60,8 +60,16 @@ for.end: ; preds = %entry ret void } +; Check that pseudo-expansion preserves <undef> flags. +define void @foo3(i8* %p) nounwind ssp { +entry: + tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4) + ret void +} + declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind !0 = metadata !{metadata !"omnipotent char", metadata !1} !1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll index 9ccff07d45..6da90897b9 100644 --- a/test/CodeGen/ARM/addrmode.ll +++ b/test/CodeGen/ARM/addrmode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4 +; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4 define i32 @t1(i32 %a) { %b = mul i32 %a, 9 diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll index 31c500756c..d668334f8d 100644 --- a/test/CodeGen/ARM/aliases.ll +++ b/test/CodeGen/ARM/aliases.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t -; RUN: grep { = } %t | count 5 +; RUN: grep " = " %t | count 5 ; RUN: grep globl %t | count 4 ; RUN: grep weak %t | count 1 diff --git a/test/CodeGen/ARM/bicZext.ll b/test/CodeGen/ARM/bicZext.ll new file mode 100644 index 0000000000..cf4b7ba0e0 --- /dev/null +++ b/test/CodeGen/ARM/bicZext.ll @@ -0,0 +1,19 @@ +; RUN: llc %s -o - | FileCheck %s +; ModuleID = 'bic.c' +target triple = "thumbv7-apple-ios3.0.0" + +define zeroext i16 @foo16(i16 zeroext %f) nounwind readnone optsize ssp { +entry: + ; CHECK: .thumb_func _foo16 + ; CHECK: {{bic[^#]*#3}} + %and = and i16 %f, -4 + ret i16 %and +} + +define i32 @foo32(i32 %f) nounwind readnone optsize ssp { +entry: + ; CHECK: .thumb_func _foo32 + ; CHECK: {{bic[^#]*#3}} + %and = and i32 %f, -4 + ret i32 %and +} diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll index efe29d857d..00b16888f3 100644 --- a/test/CodeGen/ARM/call_nolink.ll +++ b/test/CodeGen/ARM/call_nolink.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: not grep {bx lr} +; RUN: not grep "bx lr" %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } @r = external global [14 x i32] ; <[14 x i32]*> [#uses=4] diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll index 1d011be93c..62b9e4380b 100644 --- a/test/CodeGen/ARM/cse-libcalls.ll +++ b/test/CodeGen/ARM/cse-libcalls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1 +; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll index 49c4103757..7fbf8f4090 100644 --- a/test/CodeGen/ARM/divmod.ll +++ b/test/CodeGen/ARM/divmod.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { entry: @@ -56,3 +56,17 @@ bb1: declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind + +; rdar://11714607 +define i32 @howmany(i32 %x, i32 %y) nounwind { +entry: +; CHECK: howmany: +; CHECK: bl ___udivmodsi4 +; CHECK-NOT: ___udivsi3 + %rem = urem i32 %x, %y + %div = udiv i32 %x, %y + %not.cmp = icmp ne i32 %rem, 0 + %add = zext i1 %not.cmp to i32 + %cond = add i32 %add, %div + ret i32 %cond +} diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll index 3bf1ef4ad2..303d165de0 100644 --- a/test/CodeGen/ARM/fusedMAC.ll +++ b/test/CodeGen/ARM/fusedMAC.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s +; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s ; Check generated fused MAC and MLS. define double @fusedMACTest1(double %d1, double %d2, double %d3) { @@ -141,6 +141,15 @@ entry: ret double %tmp2 } +define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp { +; CHECK: test_fnms_f32 +; CHECK: vfnms.f32 + %tmp1 = load float* %c, align 4 + %tmp2 = fsub float -0.0, %tmp1 + %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone + ret float %tmp3 +} + define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: ; CHECK: test_fnms_f64 @@ -197,7 +206,19 @@ define float @test_fma_canonicalize(float %a, float %b) nounwind { ret float %ret } +; Check that very wide vector fma's can be split into legal fma's. +define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp { +; CHECK: test_fma_v8f32 +; CHECK: vfma.f32 +; CHECK: vfma.f32 +entry: + %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone + store <8 x float> %call, <8 x float>* %p, align 16 + ret void +} + declare float @llvm.fma.f32(float, float, float) nounwind readnone declare double @llvm.fma.f64(double, double, double) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll index 89e309d160..600a8c29ea 100644 --- a/test/CodeGen/ARM/iabs.ll +++ b/test/CodeGen/ARM/iabs.ll @@ -10,7 +10,25 @@ define i32 @test(i32 %a) { %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg ret i32 %abs -; CHECK: movs r0, r0 +; CHECK: cmp ; CHECK: rsbmi r0, r0, #0 ; CHECK: bx lr } + +; rdar://11633193 +;; 3 instructions will be generated for abs(a-b): +;; subs +;; rsbmi +;; bx +define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp { +entry: +; CHECK: test2 +; CHECK: subs +; CHECK-NEXT: rsbmi +; CHECK-NEXT: bx + %sub = sub nsw i32 %a, %b + %cmp = icmp sgt i32 %sub, -1 + %sub1 = sub nsw i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll index 8130019cbf..0c8d387489 100644 --- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]} +; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]" ; Should use scaled addressing mode. define void @sintzero(i32* %a) nounwind { diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll index de48feeb9e..4a82c36676 100644 --- a/test/CodeGen/ARM/neon_div.ll +++ b/test/CodeGen/ARM/neon_div.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 -;CHECK: vrecpe.f32 ;CHECK: vmovn.i32 +;CHECK: vrecpe.f32 ;CHECK: vmovn.i32 ;CHECK: vmovn.i16 %tmp1 = load <8 x i8>* %A @@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 ;CHECK: vrecps.f32 +;CHECK: vmovn.i32 ;CHECK: vrecpe.f32 ;CHECK: vrecps.f32 ;CHECK: vmovn.i32 -;CHECK: vmovn.i32 ;CHECK: vqmovun.s16 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll new file mode 100644 index 0000000000..4bc8810354 --- /dev/null +++ b/test/CodeGen/ARM/pr13249.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple armv7--linux-gnueabi + +define arm_aapcscc i8* @__strtok_r_1c(i8* %arg, i8 signext %arg1, i8** nocapture %arg2) nounwind { +bb: + br label %bb3 + +bb3: ; preds = %bb3, %bb + %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ] + %tmp4 = load i8* %tmp, align 1 + %tmp5 = getelementptr inbounds i8* %tmp, i32 1 + br i1 undef, label %bb3, label %bb7 + +bb7: ; preds = %bb13, %bb3 + %tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ] + %tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ] + %tmp10 = icmp ne i8 %tmp8, %arg1 + %tmp12 = getelementptr inbounds i8* %tmp9, i32 1 + br i1 %tmp10, label %bb13, label %bb15 + +bb13: ; preds = %bb7 + %tmp14 = load i8* %tmp12, align 1 + br label %bb7 + +bb15: ; preds = %bb7 + store i8* %tmp9, i8** %arg2, align 4 + ret i8* %tmp +} diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll index e56e3f253e..d8b3f0e767 100644 --- a/test/CodeGen/ARM/str_pre.ll +++ b/test/CodeGen/ARM/str_pre.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm | \ -; RUN: grep {str.*\\!} | count 2 +; RUN: grep "str.*\!" | count 2 define void @test1(i32* %X, i32* %A, i32** %dest) { %B = load i32* %A ; <i32> [#uses=1] diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll index 0c2f7398cb..99ba475ad7 100644 --- a/test/CodeGen/ARM/struct_byval.ll +++ b/test/CodeGen/ARM/struct_byval.ll @@ -28,5 +28,19 @@ entry: ret i32 0 } +; Generate a loop using NEON instructions +define i32 @h() nounwind ssp { +entry: +; CHECK: h: +; CHECK: vld1 +; CHECK: sub +; CHECK: vst1 +; CHECK: bne + %st = alloca %struct.LargeStruct, align 16 + %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st) + ret i32 0 +} + declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind +declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll index 06ea703fc7..474043afc1 100644 --- a/test/CodeGen/ARM/sub.ll +++ b/test/CodeGen/ARM/sub.ll @@ -36,3 +36,15 @@ entry: %sel = select i1 %cmp, i32 1, i32 %sub ret i32 %sel } + +; rdar://11726136 +define i32 @f5(i32 %x) { +entry: +; CHECK: f5 +; CHECK: movw r1, #65535 +; CHECK-NOT: movt +; CHECK-NOT: add +; CHECK: sub r0, r0, r1 + %sub = add i32 %x, -65535 + ret i32 %sub +} diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll index 3143387ead..c403fa5c4a 100644 --- a/test/CodeGen/ARM/thread_pointer.ll +++ b/test/CodeGen/ARM/thread_pointer.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {__aeabi_read_tp} +; RUN: grep "__aeabi_read_tp" define i8* @test() { entry: diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll index 28fd469653..a25352c0f0 100644 --- a/test/CodeGen/ARM/thumb2-it-block.ll +++ b/test/CodeGen/ARM/thumb2-it-block.ll @@ -3,10 +3,10 @@ define i32 @test(i32 %a, i32 %b) { entry: -; CHECK: movs.w +; CHECK: cmp ; CHECK-NEXT: it mi ; CHECK-NEXT: rsbmi -; CHECK-NEXT: movs.w +; CHECK-NEXT: cmp ; CHECK-NEXT: it mi ; CHECK-NEXT: rsbmi %cmp1 = icmp slt i32 %a, 0 diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll new file mode 100644 index 0000000000..a5f3c9005a --- /dev/null +++ b/test/CodeGen/ARM/tls-models.ll @@ -0,0 +1,117 @@ +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s +; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s + + +@external_gd = external thread_local global i32 +@internal_gd = internal thread_local global i32 42 + +@external_ld = external thread_local(localdynamic) global i32 +@internal_ld = internal thread_local(localdynamic) global i32 42 + +@external_ie = external thread_local(initialexec) global i32 +@internal_ie = internal thread_local(initialexec) global i32 42 + +@external_le = external thread_local(localexec) global i32 +@internal_le = internal thread_local(localexec) global i32 42 + +; ----- no model specified ----- + +define i32* @f1() { +entry: + ret i32* @external_gd + + ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. + ; CHECK-NONPIC: f1: + ; CHECK-NONPIC: external_gd(gottpoff) + ; CHECK-PIC: f1: + ; CHECK-PIC: external_gd(tlsgd) +} + +define i32* @f2() { +entry: + ret i32* @internal_gd + + ; Non-PIC code can use local exec, PIC code can use local dynamic, + ; but that is not implemented, so falls back to general dynamic. + ; CHECK-NONPIC: f2: + ; CHECK-NONPIC: internal_gd(tpoff) + ; CHECK-PIC: f2: + ; CHECK-PIC: internal_gd(tlsgd) +} + + +; ----- localdynamic specified ----- + +define i32* @f3() { +entry: + ret i32* @external_ld + + ; Non-PIC code can use initial exec, PIC should use local dynamic, + ; but that is not implemented, so falls back to general dynamic. + ; CHECK-NONPIC: f3: + ; CHECK-NONPIC: external_ld(gottpoff) + ; CHECK-PIC: f3: + ; CHECK-PIC: external_ld(tlsgd) +} + +define i32* @f4() { +entry: + ret i32* @internal_ld + + ; Non-PIC code can use local exec, PIC code can use local dynamic, + ; but that is not implemented, so it falls back to general dynamic. + ; CHECK-NONPIC: f4: + ; CHECK-NONPIC: internal_ld(tpoff) + ; CHECK-PIC: f4: + ; CHECK-PIC: internal_ld(tlsgd) +} + + +; ----- initialexec specified ----- + +define i32* @f5() { +entry: + ret i32* @external_ie + + ; Non-PIC and PIC code will use initial exec as specified. + ; CHECK-NONPIC: f5: + ; CHECK-NONPIC: external_ie(gottpoff) + ; CHECK-PIC: f5: + ; CHECK-PIC: external_ie(gottpoff) +} + +define i32* @f6() { +entry: + ret i32* @internal_ie + + ; Non-PIC code can use local exec, PIC code use initial exec as specified. + ; CHECK-NONPIC: f6: + ; CHECK-NONPIC: internal_ie(tpoff) + ; CHECK-PIC: f6: + ; CHECK-PIC: internal_ie(gottpoff) +} + + +; ----- localexec specified ----- + +define i32* @f7() { +entry: + ret i32* @external_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f7: + ; CHECK-NONPIC: external_le(tpoff) + ; CHECK-PIC: f7: + ; CHECK-PIC: external_le(tpoff) +} + +define i32* @f8() { +entry: + ret i32* @internal_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f8: + ; CHECK-NONPIC: internal_le(tpoff) + ; CHECK-PIC: f8: + ; CHECK-PIC: internal_le(tpoff) +} diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll index 1087094e57..ec4278ce72 100644 --- a/test/CodeGen/ARM/tls1.ll +++ b/test/CodeGen/ARM/tls1.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {i(tpoff)} +; RUN: grep "i(tpoff)" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {__aeabi_read_tp} +; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ -; RUN: -relocation-model=pic | grep {__tls_get_addr} +; RUN: -relocation-model=pic | grep "__tls_get_addr" @i = thread_local global i32 15 ; <i32*> [#uses=2] diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll index df7a4ca02d..e0e944f70c 100644 --- a/test/CodeGen/ARM/tls3.ll +++ b/test/CodeGen/ARM/tls3.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {tbss} +; RUN: grep "tbss" %struct.anon = type { i32, i32 } @teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1] diff --git a/test/CodeGen/CellSPU/icmp16.ll b/test/CodeGen/CellSPU/icmp16.ll index 32b12617cf..2f9b091fae 100644 --- a/test/CodeGen/CellSPU/icmp16.ll +++ b/test/CodeGen/CellSPU/icmp16.ll @@ -1,14 +1,4 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ilh %t1.s | count 15 -; RUN: grep ceqh %t1.s | count 29 -; RUN: grep ceqhi %t1.s | count 13 -; RUN: grep clgth %t1.s | count 15 -; RUN: grep cgth %t1.s | count 14 -; RUN: grep cgthi %t1.s | count 6 -; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17 -; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6 +; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -27,6 +17,10 @@ target triple = "spu" ; i16 integer comparisons: define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_select_i16: +; CHECK: ceqh +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp eq i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -34,12 +28,22 @@ entry: } define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_setcc_i16: +; CHECK: ilhu +; CHECK: ceqh +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp eq i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_immed01_i16: +; CHECK: ceqhi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i16 %arg1, 511 %B = select i1 %A, i16 %val1, i16 %val2 @@ -47,6 +51,10 @@ entry: } define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_immed02_i16: +; CHECK: ceqhi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i16 %arg1, -512 %B = select i1 %A, i16 %val1, i16 %val2 @@ -54,6 +62,10 @@ entry: } define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_immed03_i16: +; CHECK: ceqhi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i16 %arg1, -1 %B = select i1 %A, i16 %val1, i16 %val2 @@ -61,6 +73,11 @@ entry: } define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_eq_immed04_i16: +; CHECK: ilh +; CHECK: ceqh +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i16 %arg1, 32768 %B = select i1 %A, i16 %val1, i16 %val2 @@ -68,6 +85,10 @@ entry: } define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_select_i16: +; CHECK: ceqh +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ne i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -75,12 +96,23 @@ entry: } define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_setcc_i16: +; CHECK: ceqh +; CHECK: ilhu +; CHECK: xorhi +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ne i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_immed01_i16: +; CHECK: ceqhi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i16 %arg1, 511 %B = select i1 %A, i16 %val1, i16 %val2 @@ -88,6 +120,10 @@ entry: } define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_immed02_i16: +; CHECK: ceqhi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i16 %arg1, -512 %B = select i1 %A, i16 %val1, i16 %val2 @@ -95,6 +131,10 @@ entry: } define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_immed03_i16: +; CHECK: ceqhi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i16 %arg1, -1 %B = select i1 %A, i16 %val1, i16 %val2 @@ -102,6 +142,11 @@ entry: } define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ne_immed04_i16: +; CHECK: ilh +; CHECK: ceqh +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i16 %arg1, 32768 %B = select i1 %A, i16 %val1, i16 %val2 @@ -109,6 +154,10 @@ entry: } define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_select_i16: +; CHECK: clgth +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ugt i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -116,12 +165,22 @@ entry: } define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_setcc_i16: +; CHECK: ilhu +; CHECK: clgth +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ugt i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_immed01_i16: +; CHECK: clgthi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i16 %arg1, 500 %B = select i1 %A, i16 %val1, i16 %val2 @@ -129,6 +188,10 @@ entry: } define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_immed02_i16: +; CHECK: ceqhi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ugt i16 %arg1, 0 %B = select i1 %A, i16 %val1, i16 %val2 @@ -136,6 +199,10 @@ entry: } define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_immed03_i16: +; CHECK: clgthi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i16 %arg1, 65024 %B = select i1 %A, i16 %val1, i16 %val2 @@ -143,6 +210,11 @@ entry: } define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ugt_immed04_i16: +; CHECK: ilh +; CHECK: clgth +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i16 %arg1, 32768 %B = select i1 %A, i16 %val1, i16 %val2 @@ -150,6 +222,12 @@ entry: } define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_uge_select_i16: +; CHECK: ceqh +; CHECK: clgth +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp uge i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -157,6 +235,14 @@ entry: } define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_uge_setcc_i16: +; CHECK: ceqh +; CHECK: clgth +; CHECK: ilhu +; CHECK: or +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp uge i16 %arg1, %arg2 ret i1 %A @@ -169,6 +255,12 @@ entry: ;; they'll ever be generated. define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_select_i16: +; CHECK: ceqh +; CHECK: clgth +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ult i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -176,12 +268,26 @@ entry: } define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_setcc_i16: +; CHECK: ceqh +; CHECK: clgth +; CHECK: ilhu +; CHECK: nor +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ult i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_immed01_i16: +; CHECK: ceqhi +; CHECK: clgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i16 %arg1, 511 %B = select i1 %A, i16 %val1, i16 %val2 @@ -189,6 +295,12 @@ entry: } define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_immed02_i16: +; CHECK: ceqhi +; CHECK: clgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i16 %arg1, 65534 %B = select i1 %A, i16 %val1, i16 %val2 @@ -196,6 +308,12 @@ entry: } define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_immed03_i16: +; CHECK: ceqhi +; CHECK: clgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i16 %arg1, 65024 %B = select i1 %A, i16 %val1, i16 %val2 @@ -203,6 +321,13 @@ entry: } define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ult_immed04_i16: +; CHECK: ilh +; CHECK: ceqh +; CHECK: clgth +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i16 %arg1, 32769 %B = select i1 %A, i16 %val1, i16 %val2 @@ -210,6 +335,10 @@ entry: } define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ule_select_i16: +; CHECK: clgth +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ule i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -217,6 +346,13 @@ entry: } define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_ule_setcc_i16: +; CHECK: clgth +; CHECK: ilhu +; CHECK: xorhi +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ule i16 %arg1, %arg2 ret i1 %A @@ -229,6 +365,10 @@ entry: ;; they'll ever be generated. define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_select_i16: +; CHECK: cgth +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sgt i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -236,12 +376,22 @@ entry: } define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_setcc_i16: +; CHECK: ilhu +; CHECK: cgth +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp sgt i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_immed01_i16: +; CHECK: cgthi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i16 %arg1, 511 %B = select i1 %A, i16 %val1, i16 %val2 @@ -249,6 +399,10 @@ entry: } define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_immed02_i16: +; CHECK: cgthi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i16 %arg1, -1 %B = select i1 %A, i16 %val1, i16 %val2 @@ -256,6 +410,10 @@ entry: } define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_immed03_i16: +; CHECK: cgthi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i16 %arg1, -512 %B = select i1 %A, i16 %val1, i16 %val2 @@ -263,6 +421,11 @@ entry: } define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sgt_immed04_i16: +; CHECK: ilh +; CHECK: ceqh +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp sgt i16 %arg1, 32768 %B = select i1 %A, i16 %val1, i16 %val2 @@ -270,6 +433,12 @@ entry: } define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sge_select_i16: +; CHECK: ceqh +; CHECK: cgth +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sge i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -277,6 +446,14 @@ entry: } define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sge_setcc_i16: +; CHECK: ceqh +; CHECK: cgth +; CHECK: ilhu +; CHECK: or +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp sge i16 %arg1, %arg2 ret i1 %A @@ -289,6 +466,12 @@ entry: ;; they'll ever be generated. define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_select_i16: +; CHECK: ceqh +; CHECK: cgth +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp slt i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -296,12 +479,26 @@ entry: } define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_setcc_i16: +; CHECK: ceqh +; CHECK: cgth +; CHECK: ilhu +; CHECK: nor +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp slt i16 %arg1, %arg2 ret i1 %A } define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_immed01_i16: +; CHECK: ceqhi +; CHECK: cgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i16 %arg1, 511 %B = select i1 %A, i16 %val1, i16 %val2 @@ -309,6 +506,12 @@ entry: } define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_immed02_i16: +; CHECK: ceqhi +; CHECK: cgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i16 %arg1, -512 %B = select i1 %A, i16 %val1, i16 %val2 @@ -316,6 +519,12 @@ entry: } define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_immed03_i16: +; CHECK: ceqhi +; CHECK: cgthi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i16 %arg1, -1 %B = select i1 %A, i16 %val1, i16 %val2 @@ -323,6 +532,10 @@ entry: } define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_slt_immed04_i16: +; CHECK: lr +; CHECK-NETX: bi + entry: %A = icmp slt i16 %arg1, 32768 %B = select i1 %A, i16 %val1, i16 %val2 @@ -330,6 +543,10 @@ entry: } define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sle_select_i16: +; CHECK: cgth +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp sle i16 %arg1, %arg2 %B = select i1 %A, i16 %val1, i16 %val2 @@ -337,6 +554,13 @@ entry: } define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind { +; CHECK: icmp_sle_setcc_i16: +; CHECK: cgth +; CHECK: ilhu +; CHECK: xorhi +; CHECK: iohl +; CHECK-NETX: bi + entry: %A = icmp sle i16 %arg1, %arg2 ret i1 %A diff --git a/test/CodeGen/CellSPU/icmp32.ll b/test/CodeGen/CellSPU/icmp32.ll index ccbb5f7cde..ea912847e8 100644 --- a/test/CodeGen/CellSPU/icmp32.ll +++ b/test/CodeGen/CellSPU/icmp32.ll @@ -1,14 +1,4 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ila %t1.s | count 6 -; RUN: grep ceq %t1.s | count 28 -; RUN: grep ceqi %t1.s | count 12 -; RUN: grep clgt %t1.s | count 16 -; RUN: grep clgti %t1.s | count 6 -; RUN: grep cgt %t1.s | count 16 -; RUN: grep cgti %t1.s | count 6 -; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20 +; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -27,6 +17,10 @@ target triple = "spu" ; i32 integer comparisons: define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_select_i32: +; CHECK: ceq +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp eq i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -34,12 +28,22 @@ entry: } define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_setcc_i32: +; CHECK: ilhu +; CHECK: ceq +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp eq i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_immed01_i32: +; CHECK: ceqi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -47,6 +51,10 @@ entry: } define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_immed02_i32: +; CHECK: ceqi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i32 %arg1, -512 %B = select i1 %A, i32 %val1, i32 %val2 @@ -54,6 +62,10 @@ entry: } define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_immed03_i32: +; CHECK: ceqi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i32 %arg1, -1 %B = select i1 %A, i32 %val1, i32 %val2 @@ -61,6 +73,11 @@ entry: } define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_eq_immed04_i32: +; CHECK: ila +; CHECK: ceq +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -68,6 +85,10 @@ entry: } define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_select_i32: +; CHECK: ceq +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ne i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -75,12 +96,23 @@ entry: } define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_setcc_i32: +; CHECK: ceq +; CHECK: ilhu +; CHECK: xori +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ne i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_immed01_i32: +; CHECK: ceqi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -88,6 +120,10 @@ entry: } define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_immed02_i32: +; CHECK: ceqi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i32 %arg1, -512 %B = select i1 %A, i32 %val1, i32 %val2 @@ -95,6 +131,10 @@ entry: } define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_immed03_i32: +; CHECK: ceqi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i32 %arg1, -1 %B = select i1 %A, i32 %val1, i32 %val2 @@ -102,6 +142,11 @@ entry: } define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ne_immed04_i32: +; CHECK: ila +; CHECK: ceq +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -109,6 +154,10 @@ entry: } define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_select_i32: +; CHECK: clgt +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ugt i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -116,12 +165,22 @@ entry: } define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_setcc_i32: +; CHECK: ilhu +; CHECK: clgt +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ugt i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_immed01_i32: +; CHECK: clgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -129,6 +188,10 @@ entry: } define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_immed02_i32: +; CHECK: clgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i32 %arg1, 4294966784 %B = select i1 %A, i32 %val1, i32 %val2 @@ -136,6 +199,10 @@ entry: } define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_immed03_i32: +; CHECK: clgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i32 %arg1, 4294967293 %B = select i1 %A, i32 %val1, i32 %val2 @@ -143,6 +210,11 @@ entry: } define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ugt_immed04_i32: +; CHECK: ila +; CHECK: clgt +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -150,6 +222,12 @@ entry: } define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_uge_select_i32: +; CHECK: ceq +; CHECK: clgt +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp uge i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -157,6 +235,14 @@ entry: } define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_uge_setcc_i32: +; CHECK: ceq +; CHECK: clgt +; CHECK: ilhu +; CHECK: or +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp uge i32 %arg1, %arg2 ret i1 %A @@ -169,6 +255,12 @@ entry: ;; they'll ever be generated. define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_select_i32: +; CHECK: ceq +; CHECK: clgt +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ult i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -176,12 +268,26 @@ entry: } define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_setcc_i32: +; CHECK: ceq +; CHECK: clgt +; CHECK: ilhu +; CHECK: nor +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ult i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_immed01_i32: +; CHECK: ceqi +; CHECK: clgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -189,6 +295,12 @@ entry: } define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_immed02_i32: +; CHECK: ceqi +; CHECK: clgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i32 %arg1, 4294966784 %B = select i1 %A, i32 %val1, i32 %val2 @@ -196,6 +308,12 @@ entry: } define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_immed03_i32: +; CHECK: ceqi +; CHECK: clgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i32 %arg1, 4294967293 %B = select i1 %A, i32 %val1, i32 %val2 @@ -203,6 +321,13 @@ entry: } define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ult_immed04_i32: +; CHECK: ila +; CHECK: ceq +; CHECK: clgt +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -210,6 +335,10 @@ entry: } define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ule_select_i32: +; CHECK: clgt +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ule i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -217,6 +346,13 @@ entry: } define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_ule_setcc_i32: +; CHECK: clgt +; CHECK: ilhu +; CHECK: xori +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp ule i32 %arg1, %arg2 ret i1 %A @@ -229,6 +365,10 @@ entry: ;; they'll ever be generated. define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_select_i32: +; CHECK: cgt +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sgt i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -236,12 +376,22 @@ entry: } define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_setcc_i32: +; CHECK: ilhu +; CHECK: cgt +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp sgt i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_immed01_i32: +; CHECK: cgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -249,6 +399,10 @@ entry: } define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_immed02_i32: +; CHECK: cgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i32 %arg1, 4294966784 %B = select i1 %A, i32 %val1, i32 %val2 @@ -256,6 +410,10 @@ entry: } define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_immed03_i32: +; CHECK: cgti +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i32 %arg1, 4294967293 %B = select i1 %A, i32 %val1, i32 %val2 @@ -263,6 +421,11 @@ entry: } define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sgt_immed04_i32: +; CHECK: ila +; CHECK: cgt +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -270,6 +433,12 @@ entry: } define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sge_select_i32: +; CHECK: ceq +; CHECK: cgt +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sge i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -277,6 +446,14 @@ entry: } define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sge_setcc_i32: +; CHECK: ceq +; CHECK: cgt +; CHECK: ilhu +; CHECK: or +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp sge i32 %arg1, %arg2 ret i1 %A @@ -289,6 +466,12 @@ entry: ;; they'll ever be generated. define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_select_i32: +; CHECK: ceq +; CHECK: cgt +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp slt i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -296,12 +479,26 @@ entry: } define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_setcc_i32: +; CHECK: ceq +; CHECK: cgt +; CHECK: ilhu +; CHECK: nor +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp slt i32 %arg1, %arg2 ret i1 %A } define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_immed01_i32: +; CHECK: ceqi +; CHECK: cgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i32 %arg1, 511 %B = select i1 %A, i32 %val1, i32 %val2 @@ -309,6 +506,12 @@ entry: } define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_immed02_i32: +; CHECK: ceqi +; CHECK: cgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i32 %arg1, -512 %B = select i1 %A, i32 %val1, i32 %val2 @@ -316,6 +519,12 @@ entry: } define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_immed03_i32: +; CHECK: ceqi +; CHECK: cgti +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i32 %arg1, -1 %B = select i1 %A, i32 %val1, i32 %val2 @@ -323,6 +532,13 @@ entry: } define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_slt_immed04_i32: +; CHECK: ila +; CHECK: ceq +; CHECK: cgt +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i32 %arg1, 32768 %B = select i1 %A, i32 %val1, i32 %val2 @@ -330,6 +546,10 @@ entry: } define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sle_select_i32: +; CHECK: cgt +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp sle i32 %arg1, %arg2 %B = select i1 %A, i32 %val1, i32 %val2 @@ -337,6 +557,13 @@ entry: } define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind { +; CHECK: icmp_sle_setcc_i32: +; CHECK: cgt +; CHECK: ilhu +; CHECK: xori +; CHECK: iohl +; CHECK: shufb + entry: %A = icmp sle i32 %arg1, %arg2 ret i1 %A diff --git a/test/CodeGen/CellSPU/icmp8.ll b/test/CodeGen/CellSPU/icmp8.ll index 5517d104ab..1db641e5a8 100644 --- a/test/CodeGen/CellSPU/icmp8.ll +++ b/test/CodeGen/CellSPU/icmp8.ll @@ -1,13 +1,4 @@ -; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep ceqb %t1.s | count 24 -; RUN: grep ceqbi %t1.s | count 12 -; RUN: grep clgtb %t1.s | count 11 -; RUN: grep cgtb %t1.s | count 13 -; RUN: grep cgtbi %t1.s | count 5 -; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3 -; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11 -; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4 +; RUN: llc < %s -march=cellspu | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" @@ -26,6 +17,10 @@ target triple = "spu" ; i8 integer comparisons: define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_eq_select_i8: +; CHECK: ceqb +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp eq i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -33,12 +28,20 @@ entry: } define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_eq_setcc_i8: +; CHECK: ceqb +; CHECK-NEXT: bi + entry: %A = icmp eq i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_eq_immed01_i8: +; CHECK: ceqbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i8 %arg1, 127 %B = select i1 %A, i8 %val1, i8 %val2 @@ -46,6 +49,10 @@ entry: } define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_eq_immed02_i8: +; CHECK: ceqbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i8 %arg1, -128 %B = select i1 %A, i8 %val1, i8 %val2 @@ -53,6 +60,10 @@ entry: } define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_eq_immed03_i8: +; CHECK: ceqbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp eq i8 %arg1, -1 %B = select i1 %A, i8 %val1, i8 %val2 @@ -60,6 +71,10 @@ entry: } define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ne_select_i8: +; CHECK: ceqb +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ne i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -67,12 +82,21 @@ entry: } define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ne_setcc_i8: +; CHECK: ceqb +; CHECK: xorbi +; CHECK-NEXT: bi + entry: %A = icmp ne i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ne_immed01_i8: +; CHECK: ceqbi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i8 %arg1, 127 %B = select i1 %A, i8 %val1, i8 %val2 @@ -80,6 +104,10 @@ entry: } define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ne_immed02_i8: +; CHECK: ceqbi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i8 %arg1, -128 %B = select i1 %A, i8 %val1, i8 %val2 @@ -87,6 +115,10 @@ entry: } define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ne_immed03_i8: +; CHECK: ceqbi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp ne i8 %arg1, -1 %B = select i1 %A, i8 %val1, i8 %val2 @@ -94,6 +126,10 @@ entry: } define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ugt_select_i8: +; CHECK: clgtb +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ugt i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -101,12 +137,20 @@ entry: } define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ugt_setcc_i8: +; CHECK: clgtb +; CHECK-NEXT: bi + entry: %A = icmp ugt i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ugt_immed01_i8: +; CHECK: clgtbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ugt i8 %arg1, 126 %B = select i1 %A, i8 %val1, i8 %val2 @@ -114,6 +158,12 @@ entry: } define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_uge_select_i8: +; CHECK: ceqb +; CHECK: clgtb +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp uge i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -121,6 +171,12 @@ entry: } define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_uge_setcc_i8: +; CHECK: ceqb +; CHECK: clgtb +; CHECK: or +; CHECK-NEXT: bi + entry: %A = icmp uge i8 %arg1, %arg2 ret i1 %A @@ -133,6 +189,12 @@ entry: ;; they'll ever be generated. define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ult_select_i8: +; CHECK: ceqb +; CHECK: clgtb +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp ult i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -140,12 +202,24 @@ entry: } define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ult_setcc_i8: +; CHECK: ceqb +; CHECK: clgtb +; CHECK: nor +; CHECK-NEXT: bi + entry: %A = icmp ult i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ult_immed01_i8: +; CHECK: ceqbi +; CHECK: clgtbi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i8 %arg1, 253 %B = select i1 %A, i8 %val1, i8 %val2 @@ -153,6 +227,12 @@ entry: } define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ult_immed02_i8: +; CHECK: ceqbi +; CHECK: clgtbi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp ult i8 %arg1, 129 %B = select i1 %A, i8 %val1, i8 %val2 @@ -160,6 +240,10 @@ entry: } define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ule_select_i8: +; CHECK: clgtb +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp ule i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -167,6 +251,11 @@ entry: } define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_ule_setcc_i8: +; CHECK: clgtb +; CHECK: xorbi +; CHECK-NEXT: bi + entry: %A = icmp ule i8 %arg1, %arg2 ret i1 %A @@ -179,6 +268,10 @@ entry: ;; they'll ever be generated. define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sgt_select_i8: +; CHECK: cgtb +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sgt i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -186,12 +279,20 @@ entry: } define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sgt_setcc_i8: +; CHECK: cgtb +; CHECK-NEXT: bi + entry: %A = icmp sgt i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sgt_immed01_i8: +; CHECK: cgtbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i8 %arg1, 96 %B = select i1 %A, i8 %val1, i8 %val2 @@ -199,6 +300,10 @@ entry: } define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sgt_immed02_i8: +; CHECK: cgtbi +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp sgt i8 %arg1, -1 %B = select i1 %A, i8 %val1, i8 %val2 @@ -206,6 +311,10 @@ entry: } define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sgt_immed03_i8: +; CHECK: ceqbi +; CHECK: selb $3, $4, $5, $3 + entry: %A = icmp sgt i8 %arg1, -128 %B = select i1 %A, i8 %val1, i8 %val2 @@ -213,6 +322,12 @@ entry: } define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sge_select_i8: +; CHECK: ceqb +; CHECK: cgtb +; CHECK: or +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp sge i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -220,6 +335,12 @@ entry: } define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sge_setcc_i8: +; CHECK: ceqb +; CHECK: cgtb +; CHECK: or +; CHECK-NEXT: bi + entry: %A = icmp sge i8 %arg1, %arg2 ret i1 %A @@ -232,6 +353,12 @@ entry: ;; they'll ever be generated. define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_slt_select_i8: +; CHECK: ceqb +; CHECK: cgtb +; CHECK: nor +; CHECK: selb $3, $6, $5, $3 + entry: %A = icmp slt i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -239,12 +366,24 @@ entry: } define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_slt_setcc_i8: +; CHECK: ceqb +; CHECK: cgtb +; CHECK: nor +; CHECK-NEXT: bi + entry: %A = icmp slt i8 %arg1, %arg2 ret i1 %A } define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_slt_immed01_i8: +; CHECK: ceqbi +; CHECK: cgtbi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i8 %arg1, 96 %B = select i1 %A, i8 %val1, i8 %val2 @@ -252,6 +391,12 @@ entry: } define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_slt_immed02_i8: +; CHECK: ceqbi +; CHECK: cgtbi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i8 %arg1, -120 %B = select i1 %A, i8 %val1, i8 %val2 @@ -259,6 +404,12 @@ entry: } define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_slt_immed03_i8: +; CHECK: ceqbi +; CHECK: cgtbi +; CHECK: nor +; CHECK: selb $3, $5, $4, $3 + entry: %A = icmp slt i8 %arg1, -1 %B = select i1 %A, i8 %val1, i8 %val2 @@ -266,6 +417,10 @@ entry: } define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sle_select_i8: +; CHECK: cgtb +; CHECK: selb $3, $5, $6, $3 + entry: %A = icmp sle i8 %arg1, %arg2 %B = select i1 %A, i8 %val1, i8 %val2 @@ -273,6 +428,11 @@ entry: } define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind { +; CHECK: icmp_sle_setcc_i8: +; CHECK: cgtb +; CHECK: xorbi +; CHECK-NEXT: bi + entry: %A = icmp sle i8 %arg1, %arg2 ret i1 %A diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index f4aad44ed6..1ccc356dcf 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -1,20 +1,20 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep {shlh } %t1.s | count 10 -; RUN: grep {shlhi } %t1.s | count 3 -; RUN: grep {shl } %t1.s | count 10 -; RUN: grep {shli } %t1.s | count 3 -; RUN: grep {xshw } %t1.s | count 5 -; RUN: grep {and } %t1.s | count 15 -; RUN: grep {andi } %t1.s | count 4 -; RUN: grep {rotmi } %t1.s | count 4 -; RUN: grep {rotqmbyi } %t1.s | count 1 -; RUN: grep {rotqmbii } %t1.s | count 2 -; RUN: grep {rotqmby } %t1.s | count 1 -; RUN: grep {rotqmbi } %t1.s | count 2 -; RUN: grep {rotqbyi } %t1.s | count 1 -; RUN: grep {rotqbii } %t1.s | count 2 -; RUN: grep {rotqbybi } %t1.s | count 1 -; RUN: grep {sfi } %t1.s | count 6 +; RUN: grep "shlh " %t1.s | count 10 +; RUN: grep "shlhi " %t1.s | count 3 +; RUN: grep "shl " %t1.s | count 10 +; RUN: grep "shli " %t1.s | count 3 +; RUN: grep "xshw " %t1.s | count 5 +; RUN: grep "and " %t1.s | count 15 +; RUN: grep "andi " %t1.s | count 4 +; RUN: grep "rotmi " %t1.s | count 4 +; RUN: grep "rotqmbyi " %t1.s | count 1 +; RUN: grep "rotqmbii " %t1.s | count 2 +; RUN: grep "rotqmby " %t1.s | count 1 +; RUN: grep "rotqmbi " %t1.s | count 2 +; RUN: grep "rotqbyi " %t1.s | count 1 +; RUN: grep "rotqbii " %t1.s | count 2 +; RUN: grep "rotqbybi " %t1.s | count 1 +; RUN: grep "sfi " %t1.s | count 6 ; RUN: cat %t1.s | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll index 6ca5b08923..43f8776a3d 100644 --- a/test/CodeGen/CellSPU/stores.ll +++ b/test/CodeGen/CellSPU/stores.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep {stqd.*0(\$3)} %t1.s | count 4 -; RUN: grep {stqd.*16(\$3)} %t1.s | count 4 +; RUN: grep 'stqd.*0($3)' %t1.s | count 4 +; RUN: grep 'stqd.*16($3)' %t1.s | count 4 ; RUN: grep 16256 %t1.s | count 2 ; RUN: grep 16384 %t1.s | count 1 ; RUN: grep 771 %t1.s | count 4 @@ -8,7 +8,7 @@ ; RUN: grep 1799 %t1.s | count 2 ; RUN: grep 1543 %t1.s | count 5 ; RUN: grep 1029 %t1.s | count 3 -; RUN: grep {shli.*, 4} %t1.s | count 4 +; RUN: grep 'shli.*, 4' %t1.s | count 4 ; RUN: grep stqx %t1.s | count 4 ; RUN: grep ilhu %t1.s | count 11 ; RUN: grep iohl %t1.s | count 8 diff --git a/test/CodeGen/CellSPU/trunc.ll b/test/CodeGen/CellSPU/trunc.ll index d16185238a..e4c8fb49a3 100644 --- a/test/CodeGen/CellSPU/trunc.ll +++ b/test/CodeGen/CellSPU/trunc.ll @@ -1,19 +1,19 @@ ; RUN: llc < %s -march=cellspu > %t1.s ; RUN: grep shufb %t1.s | count 19 -; RUN: grep {ilhu.*1799} %t1.s | count 1 -; RUN: grep {ilhu.*771} %t1.s | count 2 -; RUN: grep {ilhu.*1543} %t1.s | count 1 -; RUN: grep {ilhu.*1029} %t1.s | count 1 -; RUN: grep {ilhu.*515} %t1.s | count 1 -; RUN: grep {ilhu.*3855} %t1.s | count 1 -; RUN: grep {ilhu.*3599} %t1.s | count 1 -; RUN: grep {ilhu.*3085} %t1.s | count 1 -; RUN: grep {iohl.*3855} %t1.s | count 1 -; RUN: grep {iohl.*3599} %t1.s | count 2 -; RUN: grep {iohl.*1543} %t1.s | count 2 -; RUN: grep {iohl.*771} %t1.s | count 2 -; RUN: grep {iohl.*515} %t1.s | count 1 -; RUN: grep {iohl.*1799} %t1.s | count 1 +; RUN: grep "ilhu.*1799" %t1.s | count 1 +; RUN: grep "ilhu.*771" %t1.s | count 2 +; RUN: grep "ilhu.*1543" %t1.s | count 1 +; RUN: grep "ilhu.*1029" %t1.s | count 1 +; RUN: grep "ilhu.*515" %t1.s | count 1 +; RUN: grep "ilhu.*3855" %t1.s | count 1 +; RUN: grep "ilhu.*3599" %t1.s | count 1 +; RUN: grep "ilhu.*3085" %t1.s | count 1 +; RUN: grep "iohl.*3855" %t1.s | count 1 +; RUN: grep "iohl.*3599" %t1.s | count 2 +; RUN: grep "iohl.*1543" %t1.s | count 2 +; RUN: grep "iohl.*771" %t1.s | count 2 +; RUN: grep "iohl.*515" %t1.s | count 1 +; RUN: grep "iohl.*1799" %t1.s | count 1 ; RUN: grep lqa %t1.s | count 1 ; RUN: grep cbd %t1.s | count 4 ; RUN: grep chd %t1.s | count 3 diff --git a/test/CodeGen/Generic/APIntLoadStore.ll b/test/CodeGen/Generic/APIntLoadStore.ll index 2d71ece3eb..7c71a33fc3 100644 --- a/test/CodeGen/Generic/APIntLoadStore.ll +++ b/test/CodeGen/Generic/APIntLoadStore.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s > %t -; XFAIL: powerpc @i1_l = external global i1 ; <i1*> [#uses=1] @i1_s = external global i1 ; <i1*> [#uses=1] @i2_l = external global i2 ; <i2*> [#uses=1] diff --git a/test/CodeGen/Generic/asm-large-immediate.ll b/test/CodeGen/Generic/asm-large-immediate.ll index 605665bef6..891bbc9cc1 100644 --- a/test/CodeGen/Generic/asm-large-immediate.ll +++ b/test/CodeGen/Generic/asm-large-immediate.ll @@ -1,8 +1,10 @@ -; RUN: llc < %s | grep 68719476738 +; RUN: llc < %s | FileCheck %s define void @test() { entry: +; CHECK: /* result: 68719476738 */ tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) +; CHECK: /* result: -68719476738 */ + tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) ret void } - diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll new file mode 100644 index 0000000000..d6ba138fc6 --- /dev/null +++ b/test/CodeGen/Generic/donothing.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s | FileCheck %s + +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_call_unexpected(i8*) +declare void @llvm.donothing() readnone + +; CHECK: f1 +define void @f1() nounwind uwtable ssp { +entry: +; CHECK-NOT donothing + invoke void @llvm.donothing() + to label %invoke.cont unwind label %lpad + +invoke.cont: + ret void + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +; CHECK: f2 +define void @f2() nounwind { +entry: +; CHECK-NOT donothing + call void @llvm.donothing() + ret void +} diff --git a/test/CodeGen/Generic/print-after.ll b/test/CodeGen/Generic/print-after.ll new file mode 100644 index 0000000000..7505907ef7 --- /dev/null +++ b/test/CodeGen/Generic/print-after.ll @@ -0,0 +1,6 @@ +; RUN: not llc --help-hidden 2>&1 | FileCheck %s + +; CHECK: -print-after +; CHECK-NOT: -print-after-all +; CHECK: =simple-register-coalescing +; CHECK: -print-after-all diff --git a/test/CodeGen/Generic/print-machineinstrs.ll b/test/CodeGen/Generic/print-machineinstrs.ll index 75b4cd14ff..75dceb5b26 100644 --- a/test/CodeGen/Generic/print-machineinstrs.ll +++ b/test/CodeGen/Generic/print-machineinstrs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs=branch-folder -o /dev/null |& FileCheck %s -; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs -o /dev/null |& FileCheck %s -; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs= -o /dev/null |& FileCheck %s +; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs=branch-folder -o /dev/null 2>&1 | FileCheck %s +; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs -o /dev/null 2>&1 | FileCheck %s +; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs= -o /dev/null 2>&1 | FileCheck %s define i64 @foo(i64 %a, i64 %b) nounwind { ; CHECK: -branch-folder -print-machineinstrs diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll new file mode 100644 index 0000000000..557e097840 --- /dev/null +++ b/test/CodeGen/Generic/stop-after.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -debug-pass=Structure -stop-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP +; RUN: llc < %s -debug-pass=Structure -start-after=loop-reduce -o /dev/null 2>&1 | FileCheck %s -check-prefix=START + +; STOP: -loop-reduce -print-module +; STOP: Loop Strength Reduction +; STOP-NEXT: Machine Function Analysis + +; START: -machine-branch-prob -gc-lowering +; START: FunctionPass Manager +; START-NEXT: Lower Garbage Collection Instructions diff --git a/test/CodeGen/Generic/undef-phi.ll b/test/CodeGen/Generic/undef-phi.ll new file mode 100644 index 0000000000..10899f9fa2 --- /dev/null +++ b/test/CodeGen/Generic/undef-phi.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -verify-machineinstrs -verify-coalescing +; +; This function has a PHI with one undefined input. Verify that PHIElimination +; inserts an IMPLICIT_DEF instruction in the predecessor so all paths to the use +; pass through a def. + +%struct.xx_stack = type { i32, %struct.xx_stack* } + +define i32 @push(%struct.xx_stack* %stack) nounwind uwtable readonly ssp { +entry: + %tobool1 = icmp eq %struct.xx_stack* %stack, null + br i1 %tobool1, label %for.end, label %for.body + +for.body: + %stack.addr.02 = phi %struct.xx_stack* [ %0, %for.body ], [ %stack, %entry ] + %next = getelementptr inbounds %struct.xx_stack* %stack.addr.02, i64 0, i32 1 + %0 = load %struct.xx_stack** %next, align 8 + %tobool = icmp eq %struct.xx_stack* %0, null + br i1 %tobool, label %for.end, label %for.body + +for.end: + %top.0.lcssa = phi %struct.xx_stack* [ undef, %entry ], [ %stack.addr.02, %for.body ] + %first = getelementptr inbounds %struct.xx_stack* %top.0.lcssa, i64 0, i32 0 + %1 = load i32* %first, align 4 + ret i32 %1 +} diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index 519e4b93a7..9c547f15c9 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=mips -o %t -; RUN: grep {c\\..*\\.s} %t | count 3 -; RUN: grep {bc1\[tf\]} %t | count 3 +; RUN: grep "c\..*\.s" %t | count 3 +; RUN: grep "bc1[tf]" %t | count 3 ; FIXME: Disabled because branch instructions are generated where ; conditional move instructions are expected. diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index e85a749f7d..e88e3d3755 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 +; RUN: llc < %s -march=mips | grep "b[ne][eq]" | count 1 ; FIXME: Disabled because branch instructions are generated where ; conditional move instructions are expected. diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll index 7e98ff774d..261fe9db17 100644 --- a/test/CodeGen/Mips/2010-07-20-Switch.ll +++ b/test/CodeGen/Mips/2010-07-20-Switch.ll @@ -7,21 +7,20 @@ entry: %x = alloca i32, align 4 ; <i32*> [#uses=2] store volatile i32 2, i32* %x, align 4 %0 = load volatile i32* %x, align 4 ; <i32> [#uses=1] -; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0) -; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0) -; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2 -; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0) -; PIC-O32: addiu $[[R1:[0-9]+]], $[[R0]], %lo($JTI0_0) -; PIC-O32: sll $[[R2:[0-9]+]], ${{[0-9]+}}, 2 -; PIC-O32: addu $[[R3:[0-9]+]], $[[R2]], $[[R1]] -; PIC-O32: lw $[[R4:[0-9]+]], 0($[[R3]]) +; STATIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2 +; STATIC-O32: lui $[[R1:[0-9]+]], %hi($JTI0_0) +; STATIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; STATIC-O32: lw $[[R3:[0-9]+]], %lo($JTI0_0)($[[R2]]) +; PIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2 +; PIC-O32: lw $[[R1:[0-9]+]], %got($JTI0_0) +; PIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]] +; PIC-O32: lw $[[R4:[0-9]+]], %lo($JTI0_0)($[[R2]]) ; PIC-O32: addu $[[R5:[0-9]+]], $[[R4:[0-9]+]] ; PIC-O32: jr $[[R5]] -; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0) -; PIC-N64: daddiu $[[R1:[0-9]+]], $[[R0]], %got_ofst($JTI0_0) -; PIC-N64: dsll $[[R2:[0-9]+]], ${{[0-9]+}}, 3 -; PIC-N64: daddu $[[R3:[0-9]+]], $[[R2:[0-9]+]], $[[R1]] -; PIC-N64: ld $[[R4:[0-9]+]], 0($[[R3]]) +; PIC-N64: dsll $[[R0:[0-9]+]], ${{[0-9]+}}, 3 +; PIC-N64: ld $[[R1:[0-9]+]], %got_page($JTI0_0) +; PIC-N64: daddu $[[R2:[0-9]+]], $[[R0:[0-9]+]], $[[R1]] +; PIC-N64: ld $[[R4:[0-9]+]], %got_ofst($JTI0_0)($[[R2]]) ; PIC-N64: daddu $[[R5:[0-9]+]], $[[R4:[0-9]+]] ; PIC-N64: jr $[[R5]] switch i32 %0, label %bb4 [ @@ -34,7 +33,6 @@ entry: bb1: ; preds = %entry ret i32 2 -; CHECK: STATIC-O32: $BB0_2 bb2: ; preds = %entry ret i32 0 diff --git a/test/CodeGen/Mips/asm-large-immediate.ll b/test/CodeGen/Mips/asm-large-immediate.ll new file mode 100644 index 0000000000..246fff615e --- /dev/null +++ b/test/CodeGen/Mips/asm-large-immediate.ll @@ -0,0 +1,10 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s +define void @test() { +entry: +; CHECK: /* result: 68719476738 */ + tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) +; CHECK: /* result: -68719476738 */ + tail call void asm sideeffect "/* result: ${0:n} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 ) + ret void +} + diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll new file mode 100644 index 0000000000..82919e7139 --- /dev/null +++ b/test/CodeGen/Mips/fastcc.ll @@ -0,0 +1,253 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s + +@gi0 = external global i32 +@gi1 = external global i32 +@gi2 = external global i32 +@gi3 = external global i32 +@gi4 = external global i32 +@gi5 = external global i32 +@gi6 = external global i32 +@gi7 = external global i32 +@gi8 = external global i32 +@gi9 = external global i32 +@gi10 = external global i32 +@gi11 = external global i32 +@gi12 = external global i32 +@gi13 = external global i32 +@gi14 = external global i32 +@gi15 = external global i32 +@gi16 = external global i32 +@gfa0 = external global float +@gfa1 = external global float +@gfa2 = external global float +@gfa3 = external global float +@gfa4 = external global float +@gfa5 = external global float +@gfa6 = external global float +@gfa7 = external global float +@gfa8 = external global float +@gfa9 = external global float +@gfa10 = external global float +@gfa11 = external global float +@gfa12 = external global float +@gfa13 = external global float +@gfa14 = external global float +@gfa15 = external global float +@gfa16 = external global float +@gfa17 = external global float +@gfa18 = external global float +@gfa19 = external global float +@gfa20 = external global float +@gf0 = external global float +@gf1 = external global float +@gf2 = external global float +@gf3 = external global float +@gf4 = external global float +@gf5 = external global float +@gf6 = external global float +@gf7 = external global float +@gf8 = external global float +@gf9 = external global float +@gf10 = external global float +@gf11 = external global float +@gf12 = external global float +@gf13 = external global float +@gf14 = external global float +@gf15 = external global float +@gf16 = external global float +@gf17 = external global float +@gf18 = external global float +@gf19 = external global float +@gf20 = external global float +@g0 = external global i32 +@g1 = external global i32 +@g2 = external global i32 +@g3 = external global i32 +@g4 = external global i32 +@g5 = external global i32 +@g6 = external global i32 +@g7 = external global i32 +@g8 = external global i32 +@g9 = external global i32 +@g10 = external global i32 +@g11 = external global i32 +@g12 = external global i32 +@g13 = external global i32 +@g14 = external global i32 +@g15 = external global i32 +@g16 = external global i32 + +define void @caller0() nounwind { +entry: +; CHECK: caller0 +; CHECK: lw $3 +; CHECK: lw $24 +; CHECK: lw $15 +; CHECK: lw $14 +; CHECK: lw $13 +; CHECK: lw $12 +; CHECK: lw $11 +; CHECK: lw $10 +; CHECK: lw $9 +; CHECK: lw $8 +; CHECK: lw $7 +; CHECK: lw $6 +; CHECK: lw $5 +; CHECK: lw $4 + + %0 = load i32* @gi0, align 4 + %1 = load i32* @gi1, align 4 + %2 = load i32* @gi2, align 4 + %3 = load i32* @gi3, align 4 + %4 = load i32* @gi4, align 4 + %5 = load i32* @gi5, align 4 + %6 = load i32* @gi6, align 4 + %7 = load i32* @gi7, align 4 + %8 = load i32* @gi8, align 4 + %9 = load i32* @gi9, align 4 + %10 = load i32* @gi10, align 4 + %11 = load i32* @gi11, align 4 + %12 = load i32* @gi12, align 4 + %13 = load i32* @gi13, align 4 + %14 = load i32* @gi14, align 4 + %15 = load i32* @gi15, align 4 + %16 = load i32* @gi16, align 4 + tail call fastcc void @callee0(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16) + ret void +} + +define internal fastcc void @callee0(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) nounwind noinline { +entry: +; CHECK: callee0 +; CHECK: sw $4 +; CHECK: sw $5 +; CHECK: sw $6 +; CHECK: sw $7 +; CHECK: sw $8 +; CHECK: sw $9 +; CHECK: sw $10 +; CHECK: sw $11 +; CHECK: sw $12 +; CHECK: sw $13 +; CHECK: sw $14 +; CHECK: sw $15 +; CHECK: sw $24 +; CHECK: sw $3 + + store i32 %a0, i32* @g0, align 4 + store i32 %a1, i32* @g1, align 4 + store i32 %a2, i32* @g2, align 4 + store i32 %a3, i32* @g3, align 4 + store i32 %a4, i32* @g4, align 4 + store i32 %a5, i32* @g5, align 4 + store i32 %a6, i32* @g6, align 4 + store i32 %a7, i32* @g7, align 4 + store i32 %a8, i32* @g8, align 4 + store i32 %a9, i32* @g9, align 4 + store i32 %a10, i32* @g10, align 4 + store i32 %a11, i32* @g11, align 4 + store i32 %a12, i32* @g12, align 4 + store i32 %a13, i32* @g13, align 4 + store i32 %a14, i32* @g14, align 4 + store i32 %a15, i32* @g15, align 4 + store i32 %a16, i32* @g16, align 4 + ret void +} + +define void @caller1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind { +entry: +; CHECK: caller1 +; CHECK: lwc1 $f19 +; CHECK: lwc1 $f18 +; CHECK: lwc1 $f17 +; CHECK: lwc1 $f16 +; CHECK: lwc1 $f15 +; CHECK: lwc1 $f14 +; CHECK: lwc1 $f13 +; CHECK: lwc1 $f12 +; CHECK: lwc1 $f11 +; CHECK: lwc1 $f10 +; CHECK: lwc1 $f9 +; CHECK: lwc1 $f8 +; CHECK: lwc1 $f7 +; CHECK: lwc1 $f6 +; CHECK: lwc1 $f5 +; CHECK: lwc1 $f4 +; CHECK: lwc1 $f3 +; CHECK: lwc1 $f2 +; CHECK: lwc1 $f1 +; CHECK: lwc1 $f0 + + %0 = load float* @gfa0, align 4 + %1 = load float* @gfa1, align 4 + %2 = load float* @gfa2, align 4 + %3 = load float* @gfa3, align 4 + %4 = load float* @gfa4, align 4 + %5 = load float* @gfa5, align 4 + %6 = load float* @gfa6, align 4 + %7 = load float* @gfa7, align 4 + %8 = load float* @gfa8, align 4 + %9 = load float* @gfa9, align 4 + %10 = load float* @gfa10, align 4 + %11 = load float* @gfa11, align 4 + %12 = load float* @gfa12, align 4 + %13 = load float* @gfa13, align 4 + %14 = load float* @gfa14, align 4 + %15 = load float* @gfa15, align 4 + %16 = load float* @gfa16, align 4 + %17 = load float* @gfa17, align 4 + %18 = load float* @gfa18, align 4 + %19 = load float* @gfa19, align 4 + %20 = load float* @gfa20, align 4 + tail call fastcc void @callee1(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20) + ret void +} + +define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline { +entry: +; CHECK: callee1 +; CHECK: swc1 $f0 +; CHECK: swc1 $f1 +; CHECK: swc1 $f2 +; CHECK: swc1 $f3 +; CHECK: swc1 $f4 +; CHECK: swc1 $f5 +; CHECK: swc1 $f6 +; CHECK: swc1 $f7 +; CHECK: swc1 $f8 +; CHECK: swc1 $f9 +; CHECK: swc1 $f10 +; CHECK: swc1 $f11 +; CHECK: swc1 $f12 +; CHECK: swc1 $f13 +; CHECK: swc1 $f14 +; CHECK: swc1 $f15 +; CHECK: swc1 $f16 +; CHECK: swc1 $f17 +; CHECK: swc1 $f18 +; CHECK: swc1 $f19 + + store float %a0, float* @gf0, align 4 + store float %a1, float* @gf1, align 4 + store float %a2, float* @gf2, align 4 + store float %a3, float* @gf3, align 4 + store float %a4, float* @gf4, align 4 + store float %a5, float* @gf5, align 4 + store float %a6, float* @gf6, align 4 + store float %a7, float* @gf7, align 4 + store float %a8, float* @gf8, align 4 + store float %a9, float* @gf9, align 4 + store float %a10, float* @gf10, align 4 + store float %a11, float* @gf11, align 4 + store float %a12, float* @gf12, align 4 + store float %a13, float* @gf13, align 4 + store float %a14, float* @gf14, align 4 + store float %a15, float* @gf15, align 4 + store float %a16, float* @gf16, align 4 + store float %a17, float* @gf17, align 4 + store float %a18, float* @gf18, align 4 + store float %a19, float* @gf19, align 4 + store float %a20, float* @gf20, align 4 + ret void +} + diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll index 8b1f71b69f..e33021f880 100644 --- a/test/CodeGen/Mips/i64arg.ll +++ b/test/CodeGen/Mips/i64arg.ll @@ -10,8 +10,8 @@ entry: ; CHECK: jalr tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind ; CHECK: lw $25, %call16(ff2) -; CHECK: lw $[[R2:[0-9]+]], 80($sp) -; CHECK: lw $[[R3:[0-9]+]], 84($sp) +; CHECK: lw $[[R2:[0-9]+]], 88($sp) +; CHECK: lw $[[R3:[0-9]+]], 92($sp) ; CHECK: addu $4, $zero, $[[R2]] ; CHECK: addu $5, $zero, $[[R3]] ; CHECK: jalr $25 diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-r-1.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-bad-r-1.ll deleted file mode 100644 index f5255fe0a3..0000000000 --- a/test/CodeGen/Mips/inlineasm-cnstrnt-bad-r-1.ll +++ /dev/null @@ -1,17 +0,0 @@ -; -; Register constraint "r" shouldn't take long long unless -; The target is 64 bit. -; -; RUN: not llc -march=mipsel < %s 2> %t -; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s - -define i32 @main() nounwind { -entry: - -; r with long long -;CHECK-ERRORS: error: couldn't allocate output register for constraint 'r' - - tail call i64 asm sideeffect "addi $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind - ret i32 0 -} - diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll index 2dcc10def2..d75f7f2f80 100644 --- a/test/CodeGen/Mips/inlineasm-operand-code.ll +++ b/test/CodeGen/Mips/inlineasm-operand-code.ll @@ -29,5 +29,23 @@ entry: ;CHECK: #NO_APP tail call i32 asm sideeffect "addi $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) nounwind +; z with -3 +;CHECK: #APP +;CHECK: addi ${{[0-9]+}},${{[0-9]+}},-3 +;CHECK: #NO_APP + tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) nounwind + +; z with 0 +;CHECK: #APP +;CHECK: addi ${{[0-9]+}},${{[0-9]+}},$0 +;CHECK: #NO_APP + tail call i32 asm sideeffect "addi $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind + +; a long long in 32 bit mode (use to assert) +;CHECK: #APP +;CHECK: addi ${{[0-9]+}},${{[0-9]+}},3 +;CHECK: #NO_APP + tail call i64 asm sideeffect "addi $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind + ret i32 0 } diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll index 0272dea5a7..c81cc764b4 100644 --- a/test/CodeGen/Mips/largeimmprinting.ll +++ b/test/CodeGen/Mips/largeimmprinting.ll @@ -7,7 +7,7 @@ define void @f() nounwind { entry: ; CHECK: lui $at, 65534 -; CHECK: addiu $at, $at, -16 +; CHECK: addiu $at, $at, -24 ; CHECK: addu $sp, $sp, $at %agg.tmp = alloca %struct.S1, align 1 diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll new file mode 100644 index 0000000000..ef95d0011c --- /dev/null +++ b/test/CodeGen/Mips/longbranch.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=mipsel -force-mips-long-branch < %s | FileCheck %s + +@g0 = external global i32 + +define void @foo1(i32 %s) nounwind { +entry: +; CHECK: lw $[[R0:[a-z0-9]+]], %got($BB0_3)(${{[a-z0-9]+}}) +; CHECK: addiu $[[R1:[a-z0-9]+]], $[[R0]], %lo($BB0_3) +; CHECK: jr $[[R1]] + + %tobool = icmp eq i32 %s, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %0 = load i32* @g0, align 4 + %add = add nsw i32 %0, 12 + store i32 %add, i32* @g0, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + diff --git a/test/CodeGen/Mips/machineverifier.ll b/test/CodeGen/Mips/machineverifier.ll new file mode 100644 index 0000000000..c673fe557e --- /dev/null +++ b/test/CodeGen/Mips/machineverifier.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=mipsel -verify-machineinstrs +; Make sure machine verifier understands the last instruction of a basic block +; is not the terminator instruction after delay slot filler pass is run. + +@g = external global i32 + +define void @foo() nounwind { +entry: + %0 = load i32* @g, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %add = add nsw i32 %0, 10 + store i32 %add, i32* @g, align 4 + br label %if.end + +if.end: ; preds = %entry, %if.then + ret void +} + diff --git a/test/CodeGen/Mips/memcpy.ll b/test/CodeGen/Mips/memcpy.ll new file mode 100644 index 0000000000..39764a9363 --- /dev/null +++ b/test/CodeGen/Mips/memcpy.ll @@ -0,0 +1,19 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +%struct.S1 = type { i32, [41 x i8] } + +@.str = private unnamed_addr constant [31 x i8] c"abcdefghijklmnopqrstuvwxyzABCD\00", align 1 + +define void @foo1(%struct.S1* %s1, i8 signext %n) nounwind { +entry: +; CHECK-NOT: call16(memcpy + + %arraydecay = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 0 + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false) + %arrayidx = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 40 + store i8 %n, i8* %arrayidx, align 1 + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll index 9de78c11be..d5eac994c0 100644 --- a/test/CodeGen/Mips/o32_cc_byval.ll +++ b/test/CodeGen/Mips/o32_cc_byval.ll @@ -13,16 +13,16 @@ entry: ; CHECK: lw $[[R1:[0-9]+]], %got(f1.s1) ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1) ; CHECK: lw $[[R6:[0-9]+]], 28($[[R0]]) -; CHECK: lw $[[R5:[0-9]+]], 24($[[R0]]) -; CHECK: lw $[[R4:[0-9]+]], 20($[[R0]]) -; CHECK: lw $[[R3:[0-9]+]], 16($[[R0]]) -; CHECK: lw $[[R7:[0-9]+]], 12($[[R0]]) -; CHECK: lw $[[R2:[0-9]+]], 8($[[R0]]) ; CHECK: sw $[[R6]], 36($sp) +; CHECK: lw $[[R5:[0-9]+]], 24($[[R0]]) ; CHECK: sw $[[R5]], 32($sp) +; CHECK: lw $[[R4:[0-9]+]], 20($[[R0]]) ; CHECK: sw $[[R4]], 28($sp) +; CHECK: lw $[[R3:[0-9]+]], 16($[[R0]]) ; CHECK: sw $[[R3]], 24($sp) +; CHECK: lw $[[R7:[0-9]+]], 12($[[R0]]) ; CHECK: sw $[[R7]], 20($sp) +; CHECK: lw $[[R2:[0-9]+]], 8($[[R0]]) ; CHECK: sw $[[R2]], 16($sp) ; CHECK: lw $7, 4($[[R0]]) ; CHECK: lw $6, %lo(f1.s1)($[[R1]]) @@ -43,16 +43,16 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval) define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind { entry: -; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: lw $4, 80($sp) -; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp) -; CHECK: lw $[[R3:[0-9]+]], 64($sp) -; CHECK: lw $[[R4:[0-9]+]], 68($sp) -; CHECK: lw $[[R2:[0-9]+]], 60($sp) -; CHECK: lh $[[R1:[0-9]+]], 58($sp) -; CHECK: lb $[[R0:[0-9]+]], 56($sp) +; CHECK: addiu $sp, $sp, -56 +; CHECK: sw $7, 68($sp) +; CHECK: sw $6, 64($sp) +; CHECK: lw $4, 88($sp) +; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp) +; CHECK: lw $[[R3:[0-9]+]], 72($sp) +; CHECK: lw $[[R4:[0-9]+]], 76($sp) +; CHECK: lw $[[R2:[0-9]+]], 68($sp) +; CHECK: lh $[[R1:[0-9]+]], 66($sp) +; CHECK: lb $[[R0:[0-9]+]], 64($sp) ; CHECK: sw $[[R0]], 32($sp) ; CHECK: sw $[[R1]], 28($sp) ; CHECK: sw $[[R2]], 24($sp) @@ -80,13 +80,13 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float) define void @f3(%struct.S2* nocapture byval %s2) nounwind { entry: -; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: sw $4, 48($sp) -; CHECK: lw $4, 48($sp) -; CHECK: lw $[[R0:[0-9]+]], 60($sp) +; CHECK: addiu $sp, $sp, -56 +; CHECK: sw $7, 68($sp) +; CHECK: sw $6, 64($sp) +; CHECK: sw $5, 60($sp) +; CHECK: sw $4, 56($sp) +; CHECK: lw $4, 56($sp) +; CHECK: lw $[[R0:[0-9]+]], 68($sp) ; CHECK: sw $[[R0]], 24($sp) %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0 @@ -99,13 +99,13 @@ entry: define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind { entry: -; CHECK: addiu $sp, $sp, -48 -; CHECK: sw $7, 60($sp) -; CHECK: sw $6, 56($sp) -; CHECK: sw $5, 52($sp) -; CHECK: lw $4, 60($sp) -; CHECK: lw $[[R1:[0-9]+]], 80($sp) -; CHECK: lb $[[R0:[0-9]+]], 52($sp) +; CHECK: addiu $sp, $sp, -56 +; CHECK: sw $7, 68($sp) +; CHECK: sw $6, 64($sp) +; CHECK: sw $5, 60($sp) +; CHECK: lw $4, 68($sp) +; CHECK: lw $[[R1:[0-9]+]], 88($sp) +; CHECK: lb $[[R0:[0-9]+]], 60($sp) ; CHECK: sw $[[R0]], 32($sp) ; CHECK: sw $[[R1]], 24($sp) diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll index 4a3d9ab837..49d614c820 100644 --- a/test/CodeGen/Mips/o32_cc_vararg.ll +++ b/test/CodeGen/Mips/o32_cc_vararg.ll @@ -29,11 +29,11 @@ entry: ret i32 %tmp ; CHECK: va1: -; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $7, 28($sp) -; CHECK: sw $6, 24($sp) -; CHECK: sw $5, 20($sp) -; CHECK: lw $2, 20($sp) +; CHECK: addiu $sp, $sp, -24 +; CHECK: sw $7, 36($sp) +; CHECK: sw $6, 32($sp) +; CHECK: sw $5, 28($sp) +; CHECK: lw $2, 28($sp) } ; check whether the variable double argument will be accessed from the 8-byte @@ -55,11 +55,11 @@ entry: ret double %tmp ; CHECK: va2: -; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $7, 28($sp) -; CHECK: sw $6, 24($sp) -; CHECK: sw $5, 20($sp) -; CHECK: addiu $[[R0:[0-9]+]], $sp, 20 +; CHECK: addiu $sp, $sp, -24 +; CHECK: sw $7, 36($sp) +; CHECK: sw $6, 32($sp) +; CHECK: sw $5, 28($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 28 ; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 ; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 ; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] @@ -83,10 +83,10 @@ entry: ret i32 %tmp ; CHECK: va3: -; CHECK: addiu $sp, $sp, -16 -; CHECK: sw $7, 28($sp) -; CHECK: sw $6, 24($sp) -; CHECK: lw $2, 24($sp) +; CHECK: addiu $sp, $sp, -24 +; CHECK: sw $7, 36($sp) +; CHECK: sw $6, 32($sp) +; CHECK: lw $2, 32($sp) } ; double @@ -106,11 +106,11 @@ entry: ret double %tmp ; CHECK: va4: -; CHECK: addiu $sp, $sp, -24 -; CHECK: sw $7, 36($sp) -; CHECK: sw $6, 32($sp) -; CHECK: addiu ${{[0-9]+}}, $sp, 32 -; CHECK: ldc1 $f0, 32($sp) +; CHECK: addiu $sp, $sp, -32 +; CHECK: sw $7, 44($sp) +; CHECK: sw $6, 40($sp) +; CHECK: addiu ${{[0-9]+}}, $sp, 40 +; CHECK: ldc1 $f0, 40($sp) } ; int @@ -134,9 +134,9 @@ entry: ret i32 %tmp ; CHECK: va5: -; CHECK: addiu $sp, $sp, -24 -; CHECK: sw $7, 36($sp) -; CHECK: lw $2, 36($sp) +; CHECK: addiu $sp, $sp, -32 +; CHECK: sw $7, 44($sp) +; CHECK: lw $2, 44($sp) } ; double @@ -160,9 +160,9 @@ entry: ret double %tmp ; CHECK: va6: -; CHECK: addiu $sp, $sp, -24 -; CHECK: sw $7, 36($sp) -; CHECK: addiu $[[R0:[0-9]+]], $sp, 36 +; CHECK: addiu $sp, $sp, -32 +; CHECK: sw $7, 44($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 44 ; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 ; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 ; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] @@ -188,8 +188,8 @@ entry: ret i32 %tmp ; CHECK: va7: -; CHECK: addiu $sp, $sp, -24 -; CHECK: lw $2, 40($sp) +; CHECK: addiu $sp, $sp, -32 +; CHECK: lw $2, 48($sp) } ; double @@ -211,9 +211,9 @@ entry: ret double %tmp ; CHECK: va8: -; CHECK: addiu $sp, $sp, -32 -; CHECK: addiu ${{[0-9]+}}, $sp, 48 -; CHECK: ldc1 $f0, 48($sp) +; CHECK: addiu $sp, $sp, -40 +; CHECK: addiu ${{[0-9]+}}, $sp, 56 +; CHECK: ldc1 $f0, 56($sp) } ; int @@ -237,8 +237,8 @@ entry: ret i32 %tmp ; CHECK: va9: -; CHECK: addiu $sp, $sp, -32 -; CHECK: lw $2, 52($sp) +; CHECK: addiu $sp, $sp, -40 +; CHECK: lw $2, 60($sp) } ; double @@ -262,8 +262,8 @@ entry: ret double %tmp ; CHECK: va10: -; CHECK: addiu $sp, $sp, -32 -; CHECK: addiu $[[R0:[0-9]+]], $sp, 52 +; CHECK: addiu $sp, $sp, -40 +; CHECK: addiu $[[R0:[0-9]+]], $sp, 60 ; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 ; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 ; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] diff --git a/test/CodeGen/Mips/stacksize.ll b/test/CodeGen/Mips/stacksize.ll new file mode 100644 index 0000000000..42021b2151 --- /dev/null +++ b/test/CodeGen/Mips/stacksize.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s + +define i32 @foo(i32 %a) nounwind readnone { +entry: +; check that stack size is zero. +; CHECK-NOT: addiu $sp, $sp + %add = add nsw i32 %a, 1 + ret i32 %add +} diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll new file mode 100644 index 0000000000..d681091f4c --- /dev/null +++ b/test/CodeGen/Mips/tls-alias.ll @@ -0,0 +1,10 @@ +; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s + +@foo = thread_local global i32 42 +@bar = hidden alias i32* @foo + +define i32* @zed() { +; CHECK: __tls_get_addr +; CHECK-NEXT: %tlsgd(bar) + ret i32* @bar +} diff --git a/test/CodeGen/Mips/tls-models.ll b/test/CodeGen/Mips/tls-models.ll new file mode 100644 index 0000000000..8f5789ec79 --- /dev/null +++ b/test/CodeGen/Mips/tls-models.ll @@ -0,0 +1,113 @@ +; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=CHECK-PIC %s +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck -check-prefix=CHECK-NONPIC %s + +@external_gd = external thread_local global i32 +@internal_gd = internal thread_local global i32 42 + +@external_ld = external thread_local(localdynamic) global i32 +@internal_ld = internal thread_local(localdynamic) global i32 42 + +@external_ie = external thread_local(initialexec) global i32 +@internal_ie = internal thread_local(initialexec) global i32 42 + +@external_le = external thread_local(localexec) global i32 +@internal_le = internal thread_local(localexec) global i32 42 + +; ----- no model specified ----- + +define i32* @f1() { +entry: + ret i32* @external_gd + + ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. + ; CHECK-NONPIC: f1: + ; CHECK-NONPIC: %gottprel + ; CHECK-PIC: f1: + ; CHECK-PIC: %tlsgd +} + +define i32* @f2() { +entry: + ret i32* @internal_gd + + ; Non-PIC code can use local exec, PIC code can use local dynamic. + ; CHECK-NONPIC: f2: + ; CHECK-NONPIC: %tprel_hi + ; CHECK-PIC: f2: + ; CHECK-PIC: %tlsldm +} + + +; ----- localdynamic specified ----- + +define i32* @f3() { +entry: + ret i32* @external_ld + + ; Non-PIC code can use initial exec, PIC should use local dynamic. + ; CHECK-NONPIC: f3: + ; CHECK-NONPIC: %gottprel + ; CHECK-PIC: f3: + ; CHECK-PIC: %tlsldm +} + +define i32* @f4() { +entry: + ret i32* @internal_ld + + ; Non-PIC code can use local exec, PIC code can use local dynamic. + ; CHECK-NONPIC: f4: + ; CHECK-NONPIC: %tprel_hi + ; CHECK-PIC: f4: + ; CHECK-PIC: %tlsldm +} + + +; ----- initialexec specified ----- + +define i32* @f5() { +entry: + ret i32* @external_ie + + ; Non-PIC and PIC code will use initial exec as specified. + ; CHECK-NONPIC: f5: + ; CHECK-NONPIC: %gottprel + ; CHECK-PIC: f5: + ; CHECK-PIC: %gottprel +} + +define i32* @f6() { +entry: + ret i32* @internal_ie + + ; Non-PIC code can use local exec, PIC code use initial exec as specified. + ; CHECK-NONPIC: f6: + ; CHECK-NONPIC: %tprel_hi + ; CHECK-PIC: f6: + ; CHECK-PIC: %gottprel +} + + +; ----- localexec specified ----- + +define i32* @f7() { +entry: + ret i32* @external_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f7: + ; CHECK-NONPIC: %tprel_hi + ; CHECK-PIC: f7: + ; CHECK-PIC: %tprel_hi +} + +define i32* @f8() { +entry: + ret i32* @internal_le + + ; Non-PIC and PIC code will use local exec as specified. + ; CHECK-NONPIC: f8: + ; CHECK-NONPIC: %tprel_hi + ; CHECK-PIC: f8: + ; CHECK-PIC: %tprel_hi +} diff --git a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll index 5d1df468a6..43736601fe 100644 --- a/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll +++ b/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll @@ -1,7 +1,7 @@ ; This function should have exactly one call to fixdfdi, no more! ; RUN: llc < %s -march=ppc32 -mattr=-64bit | \ -; RUN: grep {bl .*fixdfdi} | count 1 +; RUN: grep "bl .*fixdfdi" | count 1 define double @test2(double %tmp.7705) { %mem_tmp.2.0.in = fptosi double %tmp.7705 to i64 ; <i64> [#uses=1] diff --git a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll index 97bb48e96e..aeb28af4be 100644 --- a/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll +++ b/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | not grep {, f1} +; RUN: llc < %s | not grep ", f1" target datalayout = "E-p:32:32" target triple = "powerpc-apple-darwin8.2.0" diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll index 969772ee2b..7e845382a8 100644 --- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll +++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ -; RUN: grep {vspltish v.*, 10} +; RUN: grep "vspltish v.*, 10" define void @test(<8 x i16>* %P) { %tmp = load <8 x i16>* %P ; <<8 x i16>> [#uses=1] diff --git a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll index 86fd947502..73736c57fe 100644 --- a/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll +++ b/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4} -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47} +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "foo r3, r4" +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep "bari r3, 47" ; PR1351 diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll index 72e93a9cce..b85792c6f4 100644 --- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll +++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll @@ -1,7 +1,7 @@ target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "powerpc-apple-darwin8.8.0" -; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30} +; RUN: llc < %s -march=ppc32 | grep "rlwinm r3, r3, 23, 30, 30" ; PR1473 define zeroext i8 @foo(i16 zeroext %a) { diff --git a/test/CodeGen/PowerPC/Frames-leaf.ll b/test/CodeGen/PowerPC/Frames-leaf.ll index c2e1d6bddc..7b1c464f9e 100644 --- a/test/CodeGen/PowerPC/Frames-leaf.ll +++ b/test/CodeGen/PowerPC/Frames-leaf.ll @@ -1,35 +1,35 @@ ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {stw r31, 20(r1)} +; RUN: not grep "stw r31, 20(r1)" ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {stwu r1, -.*(r1)} +; RUN: not grep "stwu r1, -.*(r1)" ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {addi r1, r1, } +; RUN: not grep "addi r1, r1, " ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {lwz r31, 20(r1)} +; RUN: not grep "lwz r31, 20(r1)" ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \ -; RUN: not grep {stw r31, 20(r1)} +; RUN: not grep "stw r31, 20(r1)" ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \ -; RUN: not grep {stwu r1, -.*(r1)} +; RUN: not grep "stwu r1, -.*(r1)" ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \ -; RUN: not grep {addi r1, r1, } +; RUN: not grep "addi r1, r1, " ; RUN: llc < %s -march=ppc32 -disable-fp-elim | \ -; RUN: not grep {lwz r31, 20(r1)} +; RUN: not grep "lwz r31, 20(r1)" ; RUN: llc < %s -march=ppc64 | \ -; RUN: not grep {std r31, 40(r1)} +; RUN: not grep "std r31, 40(r1)" ; RUN: llc < %s -march=ppc64 | \ -; RUN: not grep {stdu r1, -.*(r1)} +; RUN: not grep "stdu r1, -.*(r1)" ; RUN: llc < %s -march=ppc64 | \ -; RUN: not grep {addi r1, r1, } +; RUN: not grep "addi r1, r1, " ; RUN: llc < %s -march=ppc64 | \ -; RUN: not grep {ld r31, 40(r1)} +; RUN: not grep "ld r31, 40(r1)" ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \ -; RUN: not grep {stw r31, 40(r1)} +; RUN: not grep "stw r31, 40(r1)" ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \ -; RUN: not grep {stdu r1, -.*(r1)} +; RUN: not grep "stdu r1, -.*(r1)" ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \ -; RUN: not grep {addi r1, r1, } +; RUN: not grep "addi r1, r1, " ; RUN: llc < %s -march=ppc64 -disable-fp-elim | \ -; RUN: not grep {ld r31, 40(r1)} +; RUN: not grep "ld r31, 40(r1)" define i32* @f1() { %tmp = alloca i32, i32 2 ; <i32*> [#uses=1] diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll index ecd5ecd2ec..0f6bd1021f 100644 --- a/test/CodeGen/PowerPC/Frames-small.ll +++ b/test/CodeGen/PowerPC/Frames-small.ll @@ -1,26 +1,26 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -; RUN: not grep {stw r31, -4(r1)} %t1 -; RUN: grep {stwu r1, -16448(r1)} %t1 -; RUN: grep {addi r1, r1, 16448} %t1 +; RUN: not grep "stw r31, -4(r1)" %t1 +; RUN: grep "stwu r1, -16448(r1)" %t1 +; RUN: grep "addi r1, r1, 16448" %t1 ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {lwz r31, -4(r1)} +; RUN: not grep "lwz r31, -4(r1)" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t2 -; RUN: grep {stw r31, -4(r1)} %t2 -; RUN: grep {stwu r1, -16448(r1)} %t2 -; RUN: grep {addi r1, r1, 16448} %t2 -; RUN: grep {lwz r31, -4(r1)} %t2 +; RUN: grep "stw r31, -4(r1)" %t2 +; RUN: grep "stwu r1, -16448(r1)" %t2 +; RUN: grep "addi r1, r1, 16448" %t2 +; RUN: grep "lwz r31, -4(r1)" %t2 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -; RUN: not grep {std r31, -8(r1)} %t3 -; RUN: grep {stdu r1, -16496(r1)} %t3 -; RUN: grep {addi r1, r1, 16496} %t3 -; RUN: not grep {ld r31, -8(r1)} %t3 +; RUN: not grep "std r31, -8(r1)" %t3 +; RUN: grep "stdu r1, -16496(r1)" %t3 +; RUN: grep "addi r1, r1, 16496" %t3 +; RUN: not grep "ld r31, -8(r1)" %t3 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t4 -; RUN: grep {std r31, -8(r1)} %t4 -; RUN: grep {stdu r1, -16512(r1)} %t4 -; RUN: grep {addi r1, r1, 16512} %t4 -; RUN: grep {ld r31, -8(r1)} %t4 +; RUN: grep "std r31, -8(r1)" %t4 +; RUN: grep "stdu r1, -16512(r1)" %t4 +; RUN: grep "addi r1, r1, 16512" %t4 +; RUN: grep "ld r31, -8(r1)" %t4 define i32* @f1() { %tmp = alloca i32, i32 4095 ; <i32*> [#uses=1] diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll index 7b0d69cb3b..6f985c819f 100644 --- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll +++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {stw r4, 32751} +; RUN: grep "stw r4, 32751" ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {stw r4, 32751} +; RUN: grep "stw r4, 32751" ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep {std r4, 9024} +; RUN: grep "std r4, 9024" define void @test() nounwind { store i32 0, i32* inttoptr (i64 48725999 to i32*) diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll index 932ad7a63c..de3aa7c317 100644 --- a/test/CodeGen/PowerPC/a2-fp-basic.ll +++ b/test/CodeGen/PowerPC/a2-fp-basic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s +; RUN: llc < %s -march=ppc64 -mcpu=a2 -fp-contract=fast | FileCheck %s %0 = type { double, double } diff --git a/test/CodeGen/PowerPC/and-imm.ll b/test/CodeGen/PowerPC/and-imm.ll index 64a45e50c0..6fd484b40b 100644 --- a/test/CodeGen/PowerPC/and-imm.ll +++ b/test/CodeGen/PowerPC/and-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis} +; RUN: llc < %s -march=ppc32 | not grep "ori\|lis" ; andi. r3, r3, 32769 define i32 @test(i32 %X) { diff --git a/test/CodeGen/PowerPC/big-endian-actual-args.ll b/test/CodeGen/PowerPC/big-endian-actual-args.ll index 009f46811e..898ad7cb85 100644 --- a/test/CodeGen/PowerPC/big-endian-actual-args.ll +++ b/test/CodeGen/PowerPC/big-endian-actual-args.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ -; RUN: grep {addc 4, 4, 6} +; RUN: grep "addc 4, 4, 6" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ -; RUN: grep {adde 3, 3, 5} +; RUN: grep "adde 3, 3, 5" define i64 @foo(i64 %x, i64 %y) { %z = add i64 %x, %y diff --git a/test/CodeGen/PowerPC/big-endian-call-result.ll b/test/CodeGen/PowerPC/big-endian-call-result.ll index fe85404cb9..760833ce20 100644 --- a/test/CodeGen/PowerPC/big-endian-call-result.ll +++ b/test/CodeGen/PowerPC/big-endian-call-result.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ -; RUN: grep {addic 4, 4, 1} +; RUN: grep "addic 4, 4, 1" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \ -; RUN: grep {addze 3, 3} +; RUN: grep "addze 3, 3" declare i64 @foo() diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll index cc02e406aa..dda1538f1c 100644 --- a/test/CodeGen/PowerPC/branch-opt.ll +++ b/test/CodeGen/PowerPC/branch-opt.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {b LBB.*} | count 4 +; RUN: grep "b LBB.*" | count 4 target datalayout = "E-p:32:32" target triple = "powerpc-apple-darwin8.7.0" diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll index 29bcb20811..dcdda57214 100644 --- a/test/CodeGen/PowerPC/calls.ll +++ b/test/CodeGen/PowerPC/calls.ll @@ -1,11 +1,11 @@ ; Test various forms of calls. ; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {bl } | count 1 +; RUN: grep "bl " | count 1 ; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {bctrl} | count 1 +; RUN: grep "bctrl" | count 1 ; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {bla } | count 1 +; RUN: grep "bla " | count 1 declare void @foo() diff --git a/test/CodeGen/PowerPC/coalesce-ext.ll b/test/CodeGen/PowerPC/coalesce-ext.ll new file mode 100644 index 0000000000..cc80f83307 --- /dev/null +++ b/test/CodeGen/PowerPC/coalesce-ext.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=ppc64 -mtriple=powerpc64-apple-darwin < %s | FileCheck %s +; Check that the peephole optimizer knows about sext and zext instructions. +; CHECK: test1sext +define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { + %C = add i64 %A, %B + ; CHECK: add [[SUM:r[0-9]+]], r3, r4 + %D = trunc i64 %C to i32 + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]] + store volatile i64 %F, i64 *%P2 + ; CHECK: std [[EXT]] + store volatile i32 %D, i32* %P + ; Reuse low bits of extended register, don't extend live range of SUM. + ; CHECK: stw [[EXT]] + ret i32 %D +} diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll index 92d1dbe902..94c5c0290f 100644 --- a/test/CodeGen/PowerPC/compare-simm.ll +++ b/test/CodeGen/PowerPC/compare-simm.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ -; RUN: grep {cmpwi cr0, r3, -1} +; RUN: grep "cmpwi cr0, r3, -1" define i32 @test(i32 %x) nounwind { %c = icmp eq i32 %x, -1 diff --git a/test/CodeGen/PowerPC/constants.ll b/test/CodeGen/PowerPC/constants.ll index 8901e02d3b..9efca916d6 100644 --- a/test/CodeGen/PowerPC/constants.ll +++ b/test/CodeGen/PowerPC/constants.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -march=ppc32 | \ ; RUN: grep ori | count 3 ; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {li } | count 4 +; RUN: grep "li " | count 4 define i32 @f1() { entry: diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll new file mode 100644 index 0000000000..dcea06f29e --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-s000.ll @@ -0,0 +1,156 @@ +; ModuleID = 'tsc_s000.c' +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +@Y = common global [16000 x double] zeroinitializer, align 32 +@X = common global [16000 x double] zeroinitializer, align 32 +@Z = common global [16000 x double] zeroinitializer, align 32 +@U = common global [16000 x double] zeroinitializer, align 32 +@V = common global [16000 x double] zeroinitializer, align 32 +@aa = common global [256 x [256 x double]] zeroinitializer, align 32 +@bb = common global [256 x [256 x double]] zeroinitializer, align 32 +@cc = common global [256 x [256 x double]] zeroinitializer, align 32 +@array = common global [65536 x double] zeroinitializer, align 32 +@x = common global [16000 x double] zeroinitializer, align 32 +@temp = common global double 0.000000e+00, align 8 +@temp_int = common global i32 0, align 4 +@a = common global [16000 x double] zeroinitializer, align 32 +@b = common global [16000 x double] zeroinitializer, align 32 +@c = common global [16000 x double] zeroinitializer, align 32 +@d = common global [16000 x double] zeroinitializer, align 32 +@e = common global [16000 x double] zeroinitializer, align 32 +@tt = common global [256 x [256 x double]] zeroinitializer, align 32 +@indx = common global [16000 x i32] zeroinitializer, align 32 +@xx = common global double* null, align 8 +@yy = common global double* null, align 8 + +define i32 @s000() nounwind { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end, %entry + %nl.010 = phi i32 [ 0, %entry ], [ %inc7, %for.end ] + br label %for.body3 + +for.body3: ; preds = %for.body3, %for.cond1.preheader + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ] + %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv + %0 = load double* %arrayidx, align 32, !tbaa !0 + %add = fadd double %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv + store double %add, double* %arrayidx5, align 32, !tbaa !0 + %indvars.iv.next11 = or i64 %indvars.iv, 1 + %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11 + %1 = load double* %arrayidx.1, align 8, !tbaa !0 + %add.1 = fadd double %1, 1.000000e+00 + %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11 + store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0 + %indvars.iv.next.112 = or i64 %indvars.iv, 2 + %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112 + %2 = load double* %arrayidx.2, align 16, !tbaa !0 + %add.2 = fadd double %2, 1.000000e+00 + %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112 + store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0 + %indvars.iv.next.213 = or i64 %indvars.iv, 3 + %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213 + %3 = load double* %arrayidx.3, align 8, !tbaa !0 + %add.3 = fadd double %3, 1.000000e+00 + %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213 + store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0 + %indvars.iv.next.314 = or i64 %indvars.iv, 4 + %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314 + %4 = load double* %arrayidx.4, align 32, !tbaa !0 + %add.4 = fadd double %4, 1.000000e+00 + %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314 + store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0 + %indvars.iv.next.415 = or i64 %indvars.iv, 5 + %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415 + %5 = load double* %arrayidx.5, align 8, !tbaa !0 + %add.5 = fadd double %5, 1.000000e+00 + %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415 + store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0 + %indvars.iv.next.516 = or i64 %indvars.iv, 6 + %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516 + %6 = load double* %arrayidx.6, align 16, !tbaa !0 + %add.6 = fadd double %6, 1.000000e+00 + %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516 + store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0 + %indvars.iv.next.617 = or i64 %indvars.iv, 7 + %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617 + %7 = load double* %arrayidx.7, align 8, !tbaa !0 + %add.7 = fadd double %7, 1.000000e+00 + %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617 + store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0 + %indvars.iv.next.718 = or i64 %indvars.iv, 8 + %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718 + %8 = load double* %arrayidx.8, align 32, !tbaa !0 + %add.8 = fadd double %8, 1.000000e+00 + %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718 + store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0 + %indvars.iv.next.819 = or i64 %indvars.iv, 9 + %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819 + %9 = load double* %arrayidx.9, align 8, !tbaa !0 + %add.9 = fadd double %9, 1.000000e+00 + %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819 + store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0 + %indvars.iv.next.920 = or i64 %indvars.iv, 10 + %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920 + %10 = load double* %arrayidx.10, align 16, !tbaa !0 + %add.10 = fadd double %10, 1.000000e+00 + %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920 + store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0 + %indvars.iv.next.1021 = or i64 %indvars.iv, 11 + %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021 + %11 = load double* %arrayidx.11, align 8, !tbaa !0 + %add.11 = fadd double %11, 1.000000e+00 + %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021 + store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0 + %indvars.iv.next.1122 = or i64 %indvars.iv, 12 + %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122 + %12 = load double* %arrayidx.12, align 32, !tbaa !0 + %add.12 = fadd double %12, 1.000000e+00 + %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122 + store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0 + %indvars.iv.next.1223 = or i64 %indvars.iv, 13 + %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223 + %13 = load double* %arrayidx.13, align 8, !tbaa !0 + %add.13 = fadd double %13, 1.000000e+00 + %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223 + store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0 + %indvars.iv.next.1324 = or i64 %indvars.iv, 14 + %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324 + %14 = load double* %arrayidx.14, align 16, !tbaa !0 + %add.14 = fadd double %14, 1.000000e+00 + %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324 + store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0 + %indvars.iv.next.1425 = or i64 %indvars.iv, 15 + %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425 + %15 = load double* %arrayidx.15, align 8, !tbaa !0 + %add.15 = fadd double %15, 1.000000e+00 + %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425 + store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0 + %indvars.iv.next.15 = add i64 %indvars.iv, 16 + %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32 + %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000 + br i1 %exitcond.15, label %for.end, label %for.body3 + +for.end: ; preds = %for.body3 + %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind + %inc7 = add nsw i32 %nl.010, 1 + %exitcond = icmp eq i32 %inc7, 400000 + br i1 %exitcond, label %for.end8, label %for.cond1.preheader + +for.end8: ; preds = %for.end + ret i32 0 + +; CHECK: @s000 +; CHECK: mtctr +; CHECK: bdnz +} + +declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double) + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll new file mode 100644 index 0000000000..eae8c38eee --- /dev/null +++ b/test/CodeGen/PowerPC/ctrloop-sums.ll @@ -0,0 +1,134 @@ +; ModuleID = 'SingleSource/Regression/C/sumarray2d.c' +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s -march=ppc64 | FileCheck %s + +@.str = private unnamed_addr constant [23 x i8] c"Sum(Array[%d,%d] = %d\0A\00", align 1 + +define i32 @SumArray([100 x i32]* nocapture %Array, i32 %NumI, i32 %NumJ) nounwind readonly { +entry: + %cmp12 = icmp eq i32 %NumI, 0 + br i1 %cmp12, label %for.end8, label %for.cond1.preheader.lr.ph + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp29 = icmp eq i32 %NumJ, 0 + br i1 %cmp29, label %for.inc6, label %for.body3.lr.ph.us + +for.inc6.us: ; preds = %for.body3.us + %indvars.iv.next17 = add i64 %indvars.iv16, 1 + %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32 + %exitcond19 = icmp eq i32 %lftr.wideiv18, %NumI + br i1 %exitcond19, label %for.end8, label %for.body3.lr.ph.us + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ] + %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ] + %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv + %0 = load i32* %arrayidx5.us, align 4, !tbaa !0 + %add.us = add nsw i32 %0, %Result.111.us + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %NumJ + br i1 %exitcond, label %for.inc6.us, label %for.body3.us + +for.body3.lr.ph.us: ; preds = %for.inc6.us, %for.cond1.preheader.lr.ph + %indvars.iv16 = phi i64 [ %indvars.iv.next17, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ] + %Result.014.us = phi i32 [ %add.us, %for.inc6.us ], [ 0, %for.cond1.preheader.lr.ph ] + br label %for.body3.us + +for.inc6: ; preds = %for.inc6, %for.cond1.preheader.lr.ph + %i.013 = phi i32 [ %inc7, %for.inc6 ], [ 0, %for.cond1.preheader.lr.ph ] + %inc7 = add i32 %i.013, 1 + %exitcond20 = icmp eq i32 %inc7, %NumI + br i1 %exitcond20, label %for.end8, label %for.inc6 + +for.end8: ; preds = %for.inc6.us, %for.inc6, %entry + %Result.0.lcssa = phi i32 [ 0, %entry ], [ %add.us, %for.inc6.us ], [ 0, %for.inc6 ] + ret i32 %Result.0.lcssa +; CHECK: @SumArray +; CHECK: mtctr +; CHECK: bdnz +} + +define i32 @main() nounwind { +entry: + %Array = alloca [100 x [100 x i32]], align 4 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ] + %0 = trunc i64 %indvars.iv33 to i32 + %sub = sub i32 0, %0 + %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33 + store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0 + %indvars.iv.next34 = add i64 %indvars.iv33, 1 + %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32 + %exitcond36 = icmp eq i32 %lftr.wideiv35, 100 + br i1 %exitcond36, label %for.cond6.preheader, label %for.body + +for.cond6.preheader: ; preds = %for.body, %for.inc17 + %indvars.iv29 = phi i64 [ %indvars.iv.next30, %for.inc17 ], [ 0, %for.body ] + br label %for.body8 + +for.body8: ; preds = %for.inc14, %for.cond6.preheader + %indvars.iv = phi i64 [ 0, %for.cond6.preheader ], [ %indvars.iv.next, %for.inc14 ] + %1 = trunc i64 %indvars.iv to i32 + %2 = trunc i64 %indvars.iv29 to i32 + %cmp9 = icmp eq i32 %1, %2 + br i1 %cmp9, label %for.inc14, label %if.then + +if.then: ; preds = %for.body8 + %3 = add i64 %indvars.iv, %indvars.iv29 + %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv + %4 = trunc i64 %3 to i32 + store i32 %4, i32* %arrayidx13, align 4, !tbaa !0 + br label %for.inc14 + +for.inc14: ; preds = %for.body8, %if.then + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv27 = trunc i64 %indvars.iv.next to i32 + %exitcond28 = icmp eq i32 %lftr.wideiv27, 100 + br i1 %exitcond28, label %for.inc17, label %for.body8 + +for.inc17: ; preds = %for.inc14 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, 100 + br i1 %exitcond32, label %for.body3.lr.ph.us.i, label %for.cond6.preheader + +for.inc6.us.i: ; preds = %for.body3.us.i + %indvars.iv.next17.i = add i64 %indvars.iv16.i, 1 + %lftr.wideiv24 = trunc i64 %indvars.iv.next17.i to i32 + %exitcond25 = icmp eq i32 %lftr.wideiv24, 100 + br i1 %exitcond25, label %SumArray.exit, label %for.body3.lr.ph.us.i + +for.body3.us.i: ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i + %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ] + %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ] + %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i + %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0 + %add.us.i = add nsw i32 %5, %Result.111.us.i + %indvars.iv.next.i = add i64 %indvars.iv.i, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 100 + br i1 %exitcond, label %for.inc6.us.i, label %for.body3.us.i + +for.body3.lr.ph.us.i: ; preds = %for.inc17, %for.inc6.us.i + %indvars.iv16.i = phi i64 [ %indvars.iv.next17.i, %for.inc6.us.i ], [ 0, %for.inc17 ] + %Result.014.us.i = phi i32 [ %add.us.i, %for.inc6.us.i ], [ 0, %for.inc17 ] + br label %for.body3.us.i + +SumArray.exit: ; preds = %for.inc6.us.i + %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind + ret i32 0 + +; CHECK: @main +; CHECK: mtctr +; CHECK: bdnz +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/PowerPC/darwin-labels.ll b/test/CodeGen/PowerPC/darwin-labels.ll index af23369740..56f7782138 100644 --- a/test/CodeGen/PowerPC/darwin-labels.ll +++ b/test/CodeGen/PowerPC/darwin-labels.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep {foo bar":} +; RUN: llc < %s | grep 'foo bar":' target datalayout = "E-p:32:32" target triple = "powerpc-apple-darwin8.2.0" diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll index 6ef740f835..156e00b4e5 100644 --- a/test/CodeGen/PowerPC/fabs.ll +++ b/test/CodeGen/PowerPC/fabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1} +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1" define double @fabs(double %f) { entry: diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll index 815c72c1f8..27496f7937 100644 --- a/test/CodeGen/PowerPC/fma.ll +++ b/test/CodeGen/PowerPC/fma.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=ppc32 | \ -; RUN: egrep {fn?madd|fn?msub} | count 8 +; RUN: llc < %s -march=ppc32 -fp-contract=fast | \ +; RUN: egrep "fn?madd|fn?msub" | count 8 define double @test_FMADD1(double %A, double %B, double %C) { %D = fmul double %A, %B ; <double> [#uses=1] diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll index 74a8725eb1..bf8c4a22c9 100644 --- a/test/CodeGen/PowerPC/fsqrt.ll +++ b/test/CodeGen/PowerPC/fsqrt.ll @@ -2,13 +2,13 @@ ; otherwise. ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \ -; RUN: grep {fsqrt f1, f1} +; RUN: grep "fsqrt f1, f1" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ -; RUN: grep {fsqrt f1, f1} +; RUN: grep "fsqrt f1, f1" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \ -; RUN: not grep {fsqrt f1, f1} +; RUN: not grep "fsqrt f1, f1" ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \ -; RUN: not grep {fsqrt f1, f1} +; RUN: not grep "fsqrt f1, f1" declare double @llvm.sqrt.f64(double) diff --git a/test/CodeGen/PowerPC/iabs.ll b/test/CodeGen/PowerPC/iabs.ll index a43f09c7d5..7d089bbd65 100644 --- a/test/CodeGen/PowerPC/iabs.ll +++ b/test/CodeGen/PowerPC/iabs.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=ppc32 -stats |& \ -; RUN: grep {4 .*Number of machine instrs printed} +; RUN: llc < %s -march=ppc32 -stats 2>&1 | \ +; RUN: grep "4 .*Number of machine instrs printed" ;; Integer absolute value, should produce something as good as: ;; srawi r2, r3, 31 diff --git a/test/CodeGen/PowerPC/isel.ll b/test/CodeGen/PowerPC/isel.ll new file mode 100644 index 0000000000..ed494c57d3 --- /dev/null +++ b/test/CodeGen/PowerPC/isel.ll @@ -0,0 +1,23 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc -mcpu=a2 < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s + +define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) { +entry: + %p = icmp uge i64 %a, %b + %x = select i1 %p, i64 %c, i64 %d + ret i64 %x +; CHECK: @test1 +; CHECK: isel +} + +define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: + %p = icmp uge i32 %a, %b + %x = select i1 %p, i32 %c, i32 %d + ret i32 %x +; CHECK: @test2 +; CHECK: isel +} + diff --git a/test/CodeGen/PowerPC/ispositive.ll b/test/CodeGen/PowerPC/ispositive.ll index 4161e3438a..78cdf4a4d9 100644 --- a/test/CodeGen/PowerPC/ispositive.ll +++ b/test/CodeGen/PowerPC/ispositive.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ -; RUN: grep {srwi r3, r3, 31} +; RUN: grep "srwi r3, r3, 31" define i32 @test1(i32 %X) { entry: diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll new file mode 100644 index 0000000000..5725c0dddf --- /dev/null +++ b/test/CodeGen/PowerPC/lbzux.ll @@ -0,0 +1,49 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s | FileCheck %s + +define fastcc void @allocateSpace() nounwind { +entry: + %0 = load i8** undef, align 8, !tbaa !0 + br i1 undef, label %return, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + br i1 undef, label %if.end7, label %return + +if.end7: ; preds = %lor.lhs.false + br i1 undef, label %if.then15, label %if.end71 + +if.then15: ; preds = %if.end7 + br label %while.cond + +while.cond: ; preds = %while.body, %if.then15 + %idxprom17 = sext i32 0 to i64 + %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17 + %or = or i32 undef, undef + br i1 false, label %if.end71, label %while.body + +while.body: ; preds = %while.cond + br i1 undef, label %while.cond, label %if.then45 + +if.then45: ; preds = %while.body + %idxprom48139 = zext i32 %or to i64 + %arrayidx49 = getelementptr inbounds i8* %0, i64 %idxprom48139 + %1 = bitcast i8* %arrayidx49 to i16* + %2 = bitcast i8* %arrayidx18 to i16* + %3 = load i16* %1, align 1 + store i16 %3, i16* %2, align 1 + br label %return + +if.end71: ; preds = %while.cond, %if.end7 + unreachable + +return: ; preds = %if.then45, %lor.lhs.false, %entry + ret void + +; CHECK: @allocateSpace +; CHECK: lbzux +} + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll index 94c2526cf5..915595f6db 100644 --- a/test/CodeGen/PowerPC/long-compare.ll +++ b/test/CodeGen/PowerPC/long-compare.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=ppc32 | grep cntlzw ; RUN: llc < %s -march=ppc32 | not grep xori -; RUN: llc < %s -march=ppc32 | not grep {li } -; RUN: llc < %s -march=ppc32 | not grep {mr } +; RUN: llc < %s -march=ppc32 | not grep "li " +; RUN: llc < %s -march=ppc32 | not grep "mr " define i1 @test(i64 %x) { %tmp = icmp ult i64 %x, 4294967296 diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll index f441e42da2..42472c58fe 100644 --- a/test/CodeGen/PowerPC/lsr-postinc-pos.ll +++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -print-lsr-output |& FileCheck %s +; RUN: llc < %s -print-lsr-output 2>&1 | FileCheck %s ; The icmp is a post-inc use, and the increment is in %bb11, but the ; scevgep needs to be inserted in %bb so that it is dominated by %t. diff --git a/test/CodeGen/PowerPC/no-dead-strip.ll b/test/CodeGen/PowerPC/no-dead-strip.ll index 3459413253..6320e2812c 100644 --- a/test/CodeGen/PowerPC/no-dead-strip.ll +++ b/test/CodeGen/PowerPC/no-dead-strip.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep {no_dead_strip.*_X} +; RUN: llc < %s | grep "no_dead_strip.*_X" target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "powerpc-apple-darwin8.8.0" diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll index 1fad2fa3aa..77b726c5ae 100644 --- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll +++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s +; RUN: llc < %s -march=ppc32 -mcpu=440 -fp-contract=fast | FileCheck %s %0 = type { double, double } diff --git a/test/CodeGen/PowerPC/retaddr.ll b/test/CodeGen/PowerPC/retaddr.ll index cf16b4c26f..c931dfe935 100644 --- a/test/CodeGen/PowerPC/retaddr.ll +++ b/test/CodeGen/PowerPC/retaddr.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=ppc32 | grep mflr ; RUN: llc < %s -march=ppc32 | grep lwz -; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)} +; RUN: llc < %s -march=ppc64 | grep "ld r., 16(r1)" target triple = "powerpc-apple-darwin8" diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll index 6410c63234..3f90008c00 100644 --- a/test/CodeGen/PowerPC/rlwimi-commute.ll +++ b/test/CodeGen/PowerPC/rlwimi-commute.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 | grep rlwimi -; RUN: llc < %s -march=ppc32 | not grep {or } +; RUN: llc < %s -march=ppc32 | not grep "or " ; Make sure there is no register-register copies here. diff --git a/test/CodeGen/PowerPC/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll index 05d37bf162..7efdbe9634 100644 --- a/test/CodeGen/PowerPC/rlwimi3.ll +++ b/test/CodeGen/PowerPC/rlwimi3.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=ppc32 -stats |& \ -; RUN: grep {Number of machine instrs printed} | grep 12 +; RUN: llc < %s -march=ppc32 -stats 2>&1 | \ +; RUN: grep "Number of machine instrs printed" | grep 12 define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) { %tmp1 = shl i32 %srcA, 15 ; <i32> [#uses=1] diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll index 688b29aa12..731958374e 100644 --- a/test/CodeGen/PowerPC/seteq-0.ll +++ b/test/CodeGen/PowerPC/seteq-0.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \ -; RUN: grep {srwi r., r., 5} +; RUN: grep "srwi r., r., 5" define i32 @eq0(i32 %a) { %tmp.1 = icmp eq i32 %a, 0 ; <i1> [#uses=1] diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll index b4767b0a29..19ca0985ee 100644 --- a/test/CodeGen/PowerPC/small-arguments.ll +++ b/test/CodeGen/PowerPC/small-arguments.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm} +; RUN: llc < %s -march=ppc32 | not grep "extsh\|rlwinm" declare signext i16 @foo() diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll index 2020361250..810630f697 100644 --- a/test/CodeGen/PowerPC/stack-protector.ll +++ b/test/CodeGen/PowerPC/stack-protector.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_guard} -; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_fail} +; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_guard" +; RUN: llc -march=ppc32 < %s -o - | grep "__stack_chk_fail" @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] diff --git a/test/CodeGen/PowerPC/stwu-gta.ll b/test/CodeGen/PowerPC/stwu-gta.ll new file mode 100644 index 0000000000..4febe7e2fe --- /dev/null +++ b/test/CodeGen/PowerPC/stwu-gta.ll @@ -0,0 +1,22 @@ +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-linux" +; RUN: llc < %s | FileCheck %s + +%class.Two.0.5 = type { i32, i32, i32 } + +@foo = external global %class.Two.0.5, align 4 + +define void @_GLOBAL__I_a() nounwind section ".text.startup" { +entry: + store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4, !tbaa !0 + store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4, !tbaa !0 + ret void +} + +; CHECK: @_GLOBAL__I_a +; CHECK-NOT: stwux +; CHECK: stwu + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/PowerPC/stwux.ll b/test/CodeGen/PowerPC/stwux.ll new file mode 100644 index 0000000000..737e9d9f0e --- /dev/null +++ b/test/CodeGen/PowerPC/stwux.ll @@ -0,0 +1,47 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" +; RUN: llc < %s | FileCheck %s + +@multvec_i = external unnamed_addr global [100 x i32], align 4 + +define fastcc void @subs_STMultiExceptIntern() nounwind { +entry: + br i1 undef, label %while.body.lr.ph, label %return + +while.body.lr.ph: ; preds = %entry + br label %while.body + +while.body: ; preds = %if.end12, %while.body.lr.ph + %i.0240 = phi i32 [ -1, %while.body.lr.ph ], [ %i.1, %if.end12 ] + br i1 undef, label %if.end12, label %if.then + +if.then: ; preds = %while.body + br label %if.end12 + +if.end12: ; preds = %if.then, %while.body + %i.1 = phi i32 [ %i.0240, %while.body ], [ undef, %if.then ] + br i1 undef, label %while.body, label %while.end + +while.end: ; preds = %if.end12 + br i1 undef, label %return, label %if.end15 + +if.end15: ; preds = %while.end + %idxprom.i.i230 = sext i32 %i.1 to i64 + %arrayidx18 = getelementptr inbounds [100 x i32]* @multvec_i, i64 0, i64 %idxprom.i.i230 + store i32 0, i32* %arrayidx18, align 4 + br i1 undef, label %while.body21, label %while.end90 + +while.body21: ; preds = %if.end15 + unreachable + +while.end90: ; preds = %if.end15 + store i32 0, i32* %arrayidx18, align 4 + br label %return + +return: ; preds = %while.end90, %while.end, %entry + ret void + +; CHECK: @subs_STMultiExceptIntern +; CHECK: stwux +} + diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll index 91b201146b..3ea46f50e0 100644 --- a/test/CodeGen/PowerPC/trampoline.ll +++ b/test/CodeGen/PowerPC/trampoline.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup} +; RUN: llc < %s -march=ppc32 | grep "__trampoline_setup" module asm "\09.lazy_reference .objc_class_name_NSImageRep" module asm "\09.objc_class_name_NSBitmapImageRep=0" diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll index f73f93d919..18e11baf44 100644 --- a/test/CodeGen/Thumb/asmprinter-bug.ll +++ b/test/CodeGen/Thumb/asmprinter-bug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0} +; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep "#0" %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.__sFILEX = type opaque diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll index 0cac7554be..6cc4dd12f6 100644 --- a/test/CodeGen/Thumb/frame_thumb.ll +++ b/test/CodeGen/Thumb/frame_thumb.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=thumb-apple-darwin \ -; RUN: -disable-fp-elim | not grep {r11} +; RUN: -disable-fp-elim | not grep "r11" ; RUN: llc < %s -mtriple=thumb-linux-gnueabi \ -; RUN: -disable-fp-elim | not grep {r11} +; RUN: -disable-fp-elim | not grep "r11" define i32 @f() { entry: diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll index d03b5b2e3b..2e77660c45 100644 --- a/test/CodeGen/Thumb/iabs.ll +++ b/test/CodeGen/Thumb/iabs.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=thumb -stats |& \ -; RUN: grep {4 .*Number of machine instrs printed} +; RUN: llc < %s -march=thumb -stats 2>&1 | \ +; RUN: grep "4 .*Number of machine instrs printed" ;; Integer absolute value, should produce something as good as: ;; Thumb: diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll index af7d716446..348e9d3f20 100644 --- a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll +++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll @@ -1,4 +1,4 @@ -; RUN: llc -relocation-model=pic < %s | grep {:$} | sort | uniq -d | count 0 +; RUN: llc -relocation-model=pic < %s | grep ":$" | sort | uniq -d | count 0 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" target triple = "thumbv7-apple-darwin10" diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll index d1af4ba47f..2178eecb43 100644 --- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll +++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ -; RUN: grep {ldr.*\\\[.*\],} | count 1 +; RUN: grep "ldr.*\[.*\]," | count 1 define i32 @test(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b ; <i32> [#uses=2] diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll index 9cc3f4a2ed..601c0b5608 100644 --- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll +++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ -; RUN: grep {ldr.*\\!} | count 3 +; RUN: grep "ldr.*\!" | count 3 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ -; RUN: grep {ldrsb.*\\!} | count 1 +; RUN: grep "ldrsb.*\!" | count 1 define i32* @test1(i32* %X, i32* %dest) { %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2] diff --git a/test/CodeGen/Thumb2/thumb2-rev16.ll b/test/CodeGen/Thumb2/thumb2-rev16.ll index 39b6ac3f00..10cd5391a4 100644 --- a/test/CodeGen/Thumb2/thumb2-rev16.ll +++ b/test/CodeGen/Thumb2/thumb2-rev16.ll @@ -1,7 +1,7 @@ ; XFAIL: * ; fixme rev16 pattern is not matching -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1 ; 0xff00ff00 = 4278255360 ; 0x00ff00ff = 16711935 diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll index 590c333b3d..5ad92cd729 100644 --- a/test/CodeGen/Thumb2/thumb2-ror.ll +++ b/test/CodeGen/Thumb2/thumb2-ror.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s - +; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1 ; CHECK: f1: ; CHECK: ror.w r0, r0, #22 @@ -13,6 +13,8 @@ define i32 @f1(i32 %a) { ; CHECK: f2: ; CHECK-NOT: and ; CHECK: ror +; THUMB1: f2 +; THUMB1: and define i32 @f2(i32 %v, i32 %nbits) { entry: %and = and i32 %nbits, 31 @@ -21,4 +23,4 @@ entry: %shl = shl i32 %v, %sub %or = or i32 %shl, %shr ret i32 %or -}
\ No newline at end of file +} diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll index 1e555571c0..d91e3b32f9 100644 --- a/test/CodeGen/Thumb2/tls1.ll +++ b/test/CodeGen/Thumb2/tls1.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep {i(tpoff)} +; RUN: grep "i(tpoff)" ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep {__aeabi_read_tp} +; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \ -; RUN: -relocation-model=pic | grep {__tls_get_addr} +; RUN: -relocation-model=pic | grep "__tls_get_addr" @i = thread_local global i32 15 ; <i32*> [#uses=2] diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll index 24848602ba..0af2445d7f 100644 --- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll +++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll @@ -3,7 +3,7 @@ ; it makes a ton of annoying overlapping live ranges. This code should not ; cause spills! ; -; RUN: llc < %s -march=x86 -stats |& not grep spilled +; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled target datalayout = "e-p:32:32" diff --git a/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/test/CodeGen/X86/2003-11-03-GlobalBool.ll index 8b0a18550d..f201b981a8 100644 --- a/test/CodeGen/X86/2003-11-03-GlobalBool.ll +++ b/test/CodeGen/X86/2003-11-03-GlobalBool.ll @@ -1,4 +1,4 @@ ; RUN: llc < %s -march=x86 | \ -; RUN: not grep {.byte\[\[:space:\]\]*true} +; RUN: not grep ".byte[[:space:]]*true" @X = global i1 true ; <i1*> [#uses=0] diff --git a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll index fea2b54d76..dde210b776 100644 --- a/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll +++ b/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | grep {(%esp} -; RUN: llc < %s -march=x86 | grep {pushl %ebp} | count 1 -; RUN: llc < %s -march=x86 | grep {popl %ebp} | count 1 +; RUN: llc < %s -march=x86 | grep "(%esp" +; RUN: llc < %s -march=x86 | grep "pushl %ebp" | count 1 +; RUN: llc < %s -march=x86 | grep "popl %ebp" | count 1 declare i8* @llvm.returnaddress(i32) diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll index c44d10ac5b..526b0b206a 100644 --- a/test/CodeGen/X86/2004-03-30-Select-Max.ll +++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {j\[lgbe\]} +; RUN: llc < %s -march=x86 -mcpu=yonah | not grep "j[lgbe]" define i32 @max(i32 %A, i32 %B) nounwind { %gt = icmp sgt i32 %A, %B ; <i1> [#uses=1] diff --git a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll index dc69ef8310..f8bf0991fb 100644 --- a/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll +++ b/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | not grep {subl.*%esp} +; RUN: llc < %s -march=x86 | not grep "subl.*%esp" define i32 @f(i32 %a, i32 %b) { %tmp.2 = mul i32 %a, %a ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll index 0421896922..1a3d74918d 100644 --- a/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll +++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& \ +; RUN: llc < %s -march=x86 -stats 2>&1 | \ ; RUN: grep asm-printer | grep 7 define i32 @g(i32 %a, i32 %b) nounwind { diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll index 8783a11c06..fb1262a372 100644 --- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll +++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t -; RUN: grep {movl _last} %t | count 1 -; RUN: grep {cmpl.*_last} %t | count 1 +; RUN: grep "movl _last" %t | count 1 +; RUN: grep "cmpl.*_last" %t | count 1 @block = external global i8* ; <i8**> [#uses=1] @last = external global i32 ; <i32*> [#uses=3] diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll index b045329966..5cba3efeef 100644 --- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll +++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \ -; RUN: not grep {Number of register spills} +; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \ +; RUN: not grep "Number of register spills" ; END. diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll index 7d0a6ab0a0..1c75f93915 100644 --- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \ +; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ ; RUN: grep asm-printer | grep 14 ; @size20 = external global i32 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll index 23954d76a5..95eefa1e71 100644 --- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& \ +; RUN: llc < %s -march=x86 -stats 2>&1 | \ ; RUN: grep asm-printer | grep 13 define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind { diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll index d58d638562..3419d01fa0 100644 --- a/test/CodeGen/X86/2006-05-08-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp} +; RUN: llc < %s -march=x86 -relocation-model=static | not grep "subl.*%esp" @A = external global i16* ; <i16**> [#uses=1] @B = external global i32 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index d47840e866..37c510786a 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 35 +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \ +; RUN: grep "asm-printer" | grep 35 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll index 3159cec855..c5c74d1048 100644 --- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll +++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll @@ -1,7 +1,7 @@ ; PR850 ; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t -; RUN: grep {movl 4(%eax),%ebp} %t -; RUN: grep {movl 0(%eax), %ebx} %t +; RUN: grep "movl 4(%eax),%ebp" %t +; RUN: grep "movl 0(%eax), %ebx" %t define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) { %tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint $$0x80\0Apop %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i ) ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll index a19d8f7092..56d5f2f304 100644 --- a/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll +++ b/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mcpu=i386 | \ -; RUN: not grep {movl %eax, %edx} +; RUN: not grep "movl %eax, %edx" define i32 @foo(i32 %t, i32 %C) { entry: diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll index affb7afb1c..adc825c039 100644 --- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll +++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 > %t ; RUN: grep movb %t | count 2 -; RUN: grep {movzb\[wl\]} %t +; RUN: grep "movzb[wl]" %t define void @handle_vector_size_attribute() nounwind { diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll index a228898636..04d4b8ee57 100644 --- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll +++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 > %t -; RUN: not grep {,%rsp)} %t +; RUN: not grep ",%rsp)" %t ; PR1103 target datalayout = "e-p:64:64" diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll index 3312e01b3d..3b2e443d7d 100644 --- a/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll +++ b/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax} +; RUN: llc < %s -march=x86 | grep "mov %gs:72, %eax" target datalayout = "e-p:32:32" target triple = "i686-apple-darwin9" diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll index c1b1ad1c73..18b06dc085 100644 --- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll +++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mcpu=yonah -march=x86 | \ -; RUN: grep {cmpltsd %xmm0, %xmm0} +; RUN: grep "cmpltsd %xmm0, %xmm0" target datalayout = "e-p:32:32" target triple = "i686-apple-darwin9" diff --git a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll index 85a2ecc959..cae68c9f3a 100644 --- a/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll +++ b/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | not grep {bsrl.*10} +; RUN: llc < %s | not grep "bsrl.*10" ; PR1356 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll index deb39998a3..c3d7e8a054 100644 --- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll +++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp} +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep "addl .12, %esp" ; PR1398 %struct.S = type { i32, i32 } diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll index 77291f063b..aa0ee5d074 100644 --- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll +++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {movsbl} +; RUN: llc < %s -march=x86 | grep "movsbl" @X = global i32 0 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll index 5acb05134c..88186cd6fa 100644 --- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll +++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R} +; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep "lea[[:space:]]R" %struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* } %struct.AGenericManager = type <{ i8 }> diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll index 228a915e3e..56a109acfc 100644 --- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll +++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=static | grep {foo str$} +; RUN: llc < %s -relocation-model=static | grep "foo str$" ; PR1761 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-pc-linux" diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll index 2e95082afa..99df20da25 100644 --- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll +++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2 +; RUN: llc < %s -march=x86 -mcpu=generic | grep "(%esp)" | count 2 ; PR1872 %struct.c34007g__designated___XUB = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll index bdacf50711..a1b973d7cc 100644 --- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll +++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16 +; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16 ; PR1909 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1] diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll index 5115e48365..a52b36588a 100644 --- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll +++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s | grep {a:} | not grep ax -; RUN: llc < %s | grep {b:} | not grep ax +; RUN: llc < %s | grep "a:" | not grep ax +; RUN: llc < %s | grep "b:" | not grep ax ; PR2078 ; The clobber list says that "ax" is clobbered. Make sure that eax isn't ; allocated to the input/output register. diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll index 4dc3a10f46..5ca7e3ed3d 100644 --- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll +++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##} +; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep "#" | not grep -v "##" %struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* } %struct.AGenericManager = type <{ i8 }> diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll index 109069e353..3a1de11ea2 100644 --- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll +++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep {, %e} +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-cgp-branch-opts | grep movw | not grep ", %e" %struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 } %struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* } diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll index 859041eb81..f244793e7a 100644 --- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e} +; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep "%e" ; Make sure xorl operands are 32-bit registers. %struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll index 5b97eb71cb..7c04206de7 100644 --- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t -; RUN: not grep {r\[abcd\]x} %t -; RUN: not grep {r\[ds\]i} %t -; RUN: not grep {r\[bs\]p} %t +; RUN: not grep "r[abcd]x" %t +; RUN: not grep "r[ds]i" %t +; RUN: not grep "r[bs]p" %t %struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* } %struct.BltData = type { float, float, float, float } diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll index 99cb8569b3..bdac8fd484 100644 --- a/test/CodeGen/X86/2008-08-06-CmpStride.ll +++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %} +; RUN: llc -march=x86-64 < %s -o - | grep "cmpl \$[1], %" @.str = internal constant [4 x i8] c"%d\0A\00" diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll index 1d27fc53ea..c63c890add 100644 --- a/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll +++ b/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll @@ -1,15 +1,36 @@ ; Check that eh_return & unwind_init were properly lowered -; RUN: llc < %s | grep %ebp | count 9 -; RUN: llc < %s | grep %ecx | count 5 +; RUN: llc < %s -verify-machineinstrs | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i386-pc-linux" -define i8* @test(i32 %a, i8* %b) { +; CHECK: test1 +; CHECK: pushl %ebp +define i8* @test1(i32 %a, i8* %b) { entry: call void @llvm.eh.unwind.init() %foo = alloca i32 call void @llvm.eh.return.i32(i32 %a, i8* %b) +; CHECK: movl 12(%ebp), %[[ECX:e..]] +; CHECK: movl 8(%ebp), %[[EAX:e..]] +; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]]) +; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]] +; CHECK: movl %[[ECX2]], %esp +; CHECK: ret + unreachable +} + +; CHECK: test2 +; CHECK: pushl %ebp +define i8* @test2(i32 %a, i8* %b) { +entry: + call void @llvm.eh.return.i32(i32 %a, i8* %b) +; CHECK: movl 12(%ebp), %[[ECX:e..]] +; CHECK: movl 8(%ebp), %[[EAX:e..]] +; CHECK: movl %[[ECX]], 4(%ebp,%[[EAX]]) +; CHECK: leal 4(%ebp,%[[EAX]]), %[[ECX2:e..]] +; CHECK: movl %[[ECX2]], %esp +; CHECK: ret unreachable } diff --git a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll index 421b931ecd..e504bc3e77 100644 --- a/test/CodeGen/X86/2008-10-24-FlippedCompare.ll +++ b/test/CodeGen/X86/2008-10-24-FlippedCompare.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp} +; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep "ucomiss[^,]*esp" define void @f(float %wt) { entry: diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll index 9d144a4be0..66f06778bd 100644 --- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s ; Now this test spills one register. But a reload in the loop is cheaper than ; the divsd so it's a win. diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll index 2edcaea80c..0e95c9e34e 100644 --- a/test/CodeGen/X86/2008-12-23-crazy-address.ll +++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2 +; RUN: llc < %s -march=x86 -relocation-model=static | grep "lea.*X.*esp" | count 2 @X = external global [0 x i32] diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll index 3e425536d1..b478f27a95 100644 --- a/test/CodeGen/X86/2009-01-31-BigShift2.ll +++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {mov.*56} +; RUN: llc < %s -march=x86 | grep "mov.*56" ; PR3449 define void @test(<8 x double>* %P, i64* %Q) nounwind { diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll index 7ea699833b..9cbf350940 100644 --- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll +++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted +; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted ; rdar://6608609 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone { diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 8ac2b4e051..1b2f20303b 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats |& grep {8 machine-licm} +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "8 machine-licm" ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s ; rdar://6627786 ; rdar://7792037 diff --git a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll index 3564f01a7c..847a43fb06 100644 --- a/test/CodeGen/X86/2009-03-12-CPAlignBug.ll +++ b/test/CodeGen/X86/2009-03-12-CPAlignBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space} +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep ".space" ; rdar://6668548 declare double @llvm.sqrt.f64(double) nounwind readonly diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index 8bbdb0e82f..d934ec9a88 100644 --- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t ; RUN: not grep spill %t -; RUN: not grep {%rsp} %t -; RUN: not grep {%rbp} %t +; RUN: not grep "%rsp" %t +; RUN: not grep "%rbp" %t ; The register-pressure scheduler should be able to schedule this in a ; way that does not require spills. diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll index f46eed4769..ad18a0c5b9 100644 --- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll +++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded} +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded" ; XFAIL: * ; 69408 removed the opportunity for this optimization to work diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll index 5f5bf06a2f..08bf9e3f9f 100644 --- a/test/CodeGen/X86/2009-04-24.ll +++ b/test/CodeGen/X86/2009-04-24.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic > %t2 -; RUN: grep {leaq.*TLSGD} %t2 -; RUN: grep {__tls_get_addr} %t2 +; RUN: grep "leaq.*TLSGD" %t2 +; RUN: grep "__tls_get_addr" %t2 ; PR4004 @i = thread_local global i32 15 diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll index a2fd2e4c51..a6ed74ba2e 100644 --- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll +++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)} +; RUN: llc < %s | grep "movl.*%ebx, 8(%esi)" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.0" diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll index af552d4ce2..fe04272082 100644 --- a/test/CodeGen/X86/2009-05-30-ISelBug.ll +++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | not grep {movzbl %\[abcd\]h,} +; RUN: llc < %s -march=x86-64 | not grep "movzbl %[abcd]h," define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind { newFuncRoot: diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll index de930d5126..b8effa6773 100644 --- a/test/CodeGen/X86/20090313-signext.ll +++ b/test/CodeGen/X86/20090313-signext.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t -; RUN: grep {movswl %ax, %edi} %t -; RUN: grep {movw (%rax), %ax} %t +; RUN: grep "movswl %ax, %edi" %t +; RUN: grep "movw (%rax), %ax" %t ; XFAIL: * @x = common global i16 0 diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll index cd8960b9ed..eb4a5c04a2 100644 --- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll +++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt +; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp { entry: diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll index a51dad0303..47ef693cc2 100644 --- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll +++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats |& FileCheck %s +; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s ; ; This test should not cause any spilling with RAFast. ; diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll index 101eccabbd..18a3313773 100644 --- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll +++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \ -; RUN: not grep {Number of machine instructions hoisted out of loops post regalloc} +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \ +; RUN: not grep "Number of machine instructions hoisted out of loops post regalloc" ; rdar://11095580 diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll index 3758fd8ce5..33141680aa 100644 --- a/test/CodeGen/X86/MachineSink-PHIUse.ll +++ b/test/CodeGen/X86/MachineSink-PHIUse.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats |& grep {machine-sink} +; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink" define fastcc void @t() nounwind ssp { entry: diff --git a/test/CodeGen/X86/addr-label-difference.ll b/test/CodeGen/X86/addr-label-difference.ll index 49abd8a92e..15fbec52e2 100644 --- a/test/CodeGen/X86/addr-label-difference.ll +++ b/test/CodeGen/X86/addr-label-difference.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -o - | grep {__TEXT,__const} +; RUN: llc %s -o - | grep "__TEXT,__const" ; PR5929 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0" diff --git a/test/CodeGen/X86/aligned-comm.ll b/test/CodeGen/X86/aligned-comm.ll index 7715869ed9..eab02cc1f9 100644 --- a/test/CodeGen/X86/aligned-comm.ll +++ b/test/CodeGen/X86/aligned-comm.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7} -; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7} +; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep "array,16512,7" +; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep "array,16512,7" ; Darwin 9+ should get alignment on common symbols. @array = common global [4128 x i32] zeroinitializer, align 128 diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll index a45284e10c..8a8b044d14 100644 --- a/test/CodeGen/X86/alloca-align-rounding-32.ll +++ b/test/CodeGen/X86/alloca-align-rounding-32.ll @@ -15,6 +15,5 @@ define void @foo2(i32 %h) { call void @bar(<2 x i64>* %p) ret void ; CHECK: foo2 -; CHECK: andl $-32, %esp ; CHECK: andl $-32, %eax } diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index 3d76fb0aa2..7bc880625c 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -15,6 +15,5 @@ define void @foo2(i64 %h) { call void @bar(<2 x i64>* %p) ret void ; CHECK: foo2 -; CHECK: andq $-32, %rsp ; CHECK: andq $-32, %rax } diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/asm-reg-type-mismatch.ll index f0d46a0252..47accdbc07 100644 --- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll +++ b/test/CodeGen/X86/asm-reg-type-mismatch.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -mcpu=core2 | grep xorps | count 2 -; RUN: llc < %s -mcpu=core2 | not grep movap +; RUN: llc < %s -mcpu=core2 | FileCheck %s ; PR2715 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" @@ -11,8 +10,22 @@ target triple = "x86_64-unknown-linux-gnu" %struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 } %struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix } -define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind { +define i32 @test1(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind { entry: call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind ret i32 0 + ; CHECK: test1 + ; CHECK-NOT: movap + ; CHECK: xorps + ; CHECK: xorps + ; CHECK-NOT: movap +} + +define i64 @test2() nounwind { +entry: + %0 = tail call i64 asm sideeffect "movq $1, $0", "={xmm7},*m,~{dirflag},~{fpsr},~{flags}"(i64* null) nounwind + ret i64 %0 + ; CHECK: test2 + ; CHECK: movq {{.*}}, %xmm7 + ; CHECK: movd %xmm7, %rax } diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll index 5268ec3a56..e203c4ed02 100755 --- a/test/CodeGen/X86/avx-shuffle-x86_32.ll +++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll @@ -4,5 +4,5 @@ define <4 x i64> @test1(<4 x i64> %a) nounwind { %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> ret <4 x i64>%b ; CHECK: test1: - ; CHECK: vinsertf128 + ; CHECK-NOT: vinsertf128 } diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index f1debffd11..9b41709a3b 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -90,8 +90,8 @@ define i32 @test9(<4 x i32> %a) nounwind { ; Extract a value which is the result of an undef mask. define i32 @test10(<4 x i32> %a) nounwind { ; CHECK: @test10 -; CHECK-NEXT: # -; CHECK-NEXT: ret +; CHECK-NOT: {{^[^#]*[a-z]}} +; CHECK: ret %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> %r = extractelement <8 x i32> %b, i32 2 ret i32 %r @@ -219,3 +219,32 @@ define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline { %t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef> ret <16 x i16> %t } + +;CHECK: test17 +;CHECK-NOT: vinsertf128 +;CHECK: ret +define <8 x float> @test17(<4 x float> %y) { + %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + ret <8 x float> %x +} + +; CHECK: test18 +; CHECK: vshufps +; CHECK: vshufps +; CHECK: vunpcklps +; CHECK: ret +define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { + %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> + ret <8 x float>%S +} + +; CHECK: test19 +; CHECK: vshufps +; CHECK: vshufps +; CHECK: vunpcklps +; CHECK: ret +define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { + %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> + ret <8 x float>%S +} + diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3f27a0291b..459dbb235a 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -976,3 +976,163 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ret void } declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind + +define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, + <4 x i32> %idx, <2 x double> %mask) { + ; CHECK: vgatherdpd + %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, + i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, + <4 x i32>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, + <4 x i32> %idx, <4 x double> %mask) { + ; CHECK: vgatherdpd + %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, + i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, + <4 x i32>, <4 x double>, i8) nounwind readonly + +define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, + <2 x i64> %idx, <2 x double> %mask) { + ; CHECK: vgatherqpd + %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, + i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, + <2 x i64>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, + <4 x i64> %idx, <4 x double> %mask) { + ; CHECK: vgatherqpd + %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, + i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, + <4 x i64>, <4 x double>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, + <4 x i32> %idx, <4 x float> %mask) { + ; CHECK: vgatherdps + %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, + i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, + <4 x i32>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, + <8 x i32> %idx, <8 x float> %mask) { + ; CHECK: vgatherdps + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, + <8 x i32>, <8 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, + <2 x i64> %idx, <4 x float> %mask) { + ; CHECK: vgatherqps + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, + i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, + <2 x i64>, <4 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, + <4 x i64> %idx, <4 x float> %mask) { + ; CHECK: vgatherqps + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, + i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, + <4 x i64>, <4 x float>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, + <4 x i32> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, + i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, + <4 x i32>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, + <4 x i32> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, + i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, + <4 x i32>, <4 x i64>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, + <2 x i64> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, + <2 x i64>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, + <4 x i64> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, + i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, + <4 x i64>, <4 x i64>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, + <4 x i32> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, + i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, + <4 x i32>, <4 x i32>, i8) nounwind readonly + +define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, + <8 x i32> %idx, <8 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, + i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, + <8 x i32>, <8 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, + <2 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, + i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, + <2 x i64>, <4 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, + <4 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, + i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, + <4 x i64>, <4 x i32>, i8) nounwind readonly diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll index 46b41fa953..b804233663 100644 --- a/test/CodeGen/X86/avx2-vbroadcast.ll +++ b/test/CodeGen/X86/avx2-vbroadcast.ll @@ -259,3 +259,99 @@ define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp { ret <4 x double> %wide } +;CHECK: _inreg8xfloat +;CHECK: vbroadcastss +;CHECK: ret +define <8 x float> @_inreg8xfloat(<8 x float> %a) { + %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer + ret <8 x float> %b +} + +;CHECK: _inreg4xfloat +;CHECK: vbroadcastss +;CHECK: ret +define <4 x float> @_inreg4xfloat(<4 x float> %a) { + %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %b +} + +;CHECK: _inreg16xi16 +;CHECK: vpbroadcastw +;CHECK: ret +define <16 x i16> @_inreg16xi16(<16 x i16> %a) { + %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer + ret <16 x i16> %b +} + +;CHECK: _inreg8xi16 +;CHECK: vpbroadcastw +;CHECK: ret +define <8 x i16> @_inreg8xi16(<8 x i16> %a) { + %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %b +} + + +;CHECK: _inreg4xi64 +;CHECK: vpbroadcastq +;CHECK: ret +define <4 x i64> @_inreg4xi64(<4 x i64> %a) { + %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer + ret <4 x i64> %b +} + +;CHECK: _inreg2xi64 +;CHECK: vpbroadcastq +;CHECK: ret +define <2 x i64> @_inreg2xi64(<2 x i64> %a) { + %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %b +} + +;CHECK: _inreg4xdouble +;CHECK: vbroadcastsd +;CHECK: ret +define <4 x double> @_inreg4xdouble(<4 x double> %a) { + %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer + ret <4 x double> %b +} + +;CHECK: _inreg2xdouble +;CHECK: vpbroadcastq +;CHECK: ret +define <2 x double> @_inreg2xdouble(<2 x double> %a) { + %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %b +} + +;CHECK: _inreg8xi32 +;CHECK: vpbroadcastd +;CHECK: ret +define <8 x i32> @_inreg8xi32(<8 x i32> %a) { + %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer + ret <8 x i32> %b +} + +;CHECK: _inreg4xi32 +;CHECK: vpbroadcastd +;CHECK: ret +define <4 x i32> @_inreg4xi32(<4 x i32> %a) { + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %b +} + +;CHECK: _inreg32xi8 +;CHECK: vpbroadcastb +;CHECK: ret +define <32 x i8> @_inreg32xi8(<32 x i8> %a) { + %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer + ret <32 x i8> %b +} + +;CHECK: _inreg16xi8 +;CHECK: vpbroadcastb +;CHECK: ret +define <16 x i8> @_inreg16xi8(<16 x i8> %a) { + %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %b +} diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll index cf774591d8..c94261467c 100644 --- a/test/CodeGen/X86/break-anti-dependencies.ll +++ b/test/CodeGen/X86/break-anti-dependencies.ll @@ -2,11 +2,11 @@ ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency ; breaker requires liveness information to be kept. ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t -; RUN: grep {%xmm0} %t | count 14 -; RUN: not grep {%xmm1} %t +; RUN: grep "%xmm0" %t | count 14 +; RUN: not grep "%xmm1" %t ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t -; RUN: grep {%xmm0} %t | count 7 -; RUN: grep {%xmm1} %t | count 7 +; RUN: grep "%xmm0" %t | count 7 +; RUN: grep "%xmm1" %t | count 7 define void @goo(double* %r, double* %p, double* %q) nounwind { entry: diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll index 3857fb1579..38cda4d140 100644 --- a/test/CodeGen/X86/call-imm.ll +++ b/test/CodeGen/X86/call-imm.ll @@ -1,11 +1,11 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678} -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678} -; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678} +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep "call.*12345678" +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep "call.*12345678" +; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep "call.*12345678" ; Call to immediate is not safe on x86-64 unless we *know* that the ; call will be within 32-bits pcrel from the dest immediate. -; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax} +; RUN: llc < %s -march=x86-64 | grep "call.*\*%rax" ; PR3666 ; PR3773 diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll index a5848763c9..4004379938 100644 --- a/test/CodeGen/X86/coalesce-esp.ll +++ b/test/CodeGen/X86/coalesce-esp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep {movl %esp, %ebp} +; RUN: llc < %s | grep "movl %esp, %ebp" ; PR4572 ; Don't coalesce with %esp if it would end up putting %esp in diff --git a/test/CodeGen/X86/constructor.ll b/test/CodeGen/X86/constructor.ll new file mode 100644 index 0000000000..b57889643e --- /dev/null +++ b/test/CodeGen/X86/constructor.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=CTOR %s +; RUN: llc -mtriple x86_64-pc-linux -use-init-array < %s | FileCheck --check-prefix=INIT-ARRAY %s +@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }, { i32, void ()* } { i32 15, void ()* @g }] + +define void @f() { +entry: + ret void +} + +define void @g() { +entry: + ret void +} + +; CTOR: .section .ctors.65520,"aw",@progbits +; CTOR-NEXT: .align 8 +; CTOR-NEXT: .quad g +; CTOR-NEXT: .section .ctors,"aw",@progbits +; CTOR-NEXT: .align 8 +; CTOR-NEXT: .quad f + +; INIT-ARRAY: .section .init_array.15,"aw",@init_array +; INIT-ARRAY-NEXT: .align 8 +; INIT-ARRAY-NEXT: .quad g +; INIT-ARRAY-NEXT: .section .init_array,"aw",@init_array +; INIT-ARRAY-NEXT: .align 8 +; INIT-ARRAY-NEXT: .quad f diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll index b82348b32e..064ee364d1 100644 --- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll +++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS -; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS +; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS +; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; STATS: 9 asm-printer ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index cf6e27d159..c71c6ec81d 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=x86 %s -o - -; RUN: llc -march=x86-64 %s -o - +; RUN: llc -march=x86 < %s -verify-machineinstrs +; RUN: llc -march=x86-64 < %s -verify-machineinstrs ; PR6497 @@ -391,3 +391,38 @@ if.end: %t11 = tail call i64 asm sideeffect "foo", "=*m,=A,{bx},{cx},1,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %t6, i32 0, i32 0, i64 0) nounwind ret void } + +; Avoid emitting wrong kill flags from InstrEmitter. +; InstrEmitter::EmitSubregNode() may steal virtual registers from already +; emitted blocks when isCoalescableExtInstr points out the opportunity. +; Make sure kill flags are cleared on the newly global virtual register. +define i64 @ov_read(i8* %vf, i8* nocapture %buffer, i32 %length, i32 %bigendianp, i32 %word, i32 %sgned, i32* %bitstream) nounwind uwtable ssp { +entry: + br i1 undef, label %return, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br i1 undef, label %if.then3, label %if.end7 + +if.then3: ; preds = %while.body.preheader + %0 = load i32* undef, align 4 + br i1 undef, label %land.lhs.true.i255, label %if.end7 + +land.lhs.true.i255: ; preds = %if.then3 + br i1 undef, label %if.then.i256, label %if.end7 + +if.then.i256: ; preds = %land.lhs.true.i255 + %sub.i = sub i32 0, %0 + %conv = sext i32 %sub.i to i64 + br i1 undef, label %if.end7, label %while.end + +if.end7: ; preds = %if.then.i256, %land.lhs.true.i255, %if.then3, %while.body.preheader + unreachable + +while.end: ; preds = %if.then.i256 + %cmp18 = icmp sgt i32 %sub.i, 0 + %.conv = select i1 %cmp18, i64 -131, i64 %conv + ret i64 %.conv + +return: ; preds = %entry + ret i64 -131 +} diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll index c3c7990d19..af69531246 100644 --- a/test/CodeGen/X86/dagcombine-cse.ll +++ b/test/CodeGen/X86/dagcombine-cse.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14 +; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind { entry: diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll deleted file mode 100644 index b787ee87c5..0000000000 --- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll +++ /dev/null @@ -1,158 +0,0 @@ -; RUN: llc < %s -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -; rdar://11496434 - -; no VLAs or dynamic alignment -define i32 @t1() nounwind uwtable ssp { -entry: - %a = alloca i32, align 4 - call void @t1_helper(i32* %a) nounwind - %0 = load i32* %a, align 4 - %add = add nsw i32 %0, 13 - ret i32 %add - -; CHECK: _t1 -; CHECK-NOT: andq $-{{[0-9]+}}, %rsp -; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi -; CHECK: callq _t1_helper -; CHECK: movl [[OFFSET]](%rsp), %eax -; CHECK: addl $13, %eax -} - -declare void @t1_helper(i32*) - -; dynamic realignment -define i32 @t2() nounwind uwtable ssp { -entry: - %a = alloca i32, align 4 - %v = alloca <8 x float>, align 32 - call void @t2_helper(i32* %a, <8 x float>* %v) nounwind - %0 = load i32* %a, align 4 - %add = add nsw i32 %0, 13 - ret i32 %add - -; CHECK: _t2 -; CHECK: pushq %rbp -; CHECK: movq %rsp, %rbp -; CHECK: andq $-32, %rsp -; CHECK: subq ${{[0-9]+}}, %rsp -; -; CHECK: leaq {{[0-9]*}}(%rsp), %rdi -; CHECK: leaq {{[0-9]*}}(%rsp), %rsi -; CHECK: callq _t2_helper -; -; CHECK: movq %rbp, %rsp -; CHECK: popq %rbp -} - -declare void @t2_helper(i32*, <8 x float>*) - -; VLAs -define i32 @t3(i64 %sz) nounwind uwtable ssp { -entry: - %a = alloca i32, align 4 - %vla = alloca i32, i64 %sz, align 16 - call void @t3_helper(i32* %a, i32* %vla) nounwind - %0 = load i32* %a, align 4 - %add = add nsw i32 %0, 13 - ret i32 %add - -; CHECK: _t3 -; CHECK: pushq %rbp -; CHECK: movq %rsp, %rbp -; CHECK: pushq %rbx -; CHECK-NOT: andq $-{{[0-9]+}}, %rsp -; CHECK: subq ${{[0-9]+}}, %rsp -; -; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp -; CHECK: popq %rbx -; CHECK: popq %rbp -} - -declare void @t3_helper(i32*, i32*) - -; VLAs + Dynamic realignment -define i32 @t4(i64 %sz) nounwind uwtable ssp { -entry: - %a = alloca i32, align 4 - %v = alloca <8 x float>, align 32 - %vla = alloca i32, i64 %sz, align 16 - call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind - %0 = load i32* %a, align 4 - %add = add nsw i32 %0, 13 - ret i32 %add - -; CHECK: _t4 -; CHECK: pushq %rbp -; CHECK: movq %rsp, %rbp -; CHECK: andq $-32, %rsp -; CHECK: pushq %r14 -; CHECK: pushq %rbx -; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp -; CHECK: movq %rsp, %rbx -; -; CHECK: leaq {{[0-9]*}}(%rbx), %rdi -; CHECK: leaq {{[0-9]*}}(%rbx), %rdx -; CHECK: callq _t4_helper -; -; CHECK: addq $[[STACKADJ]], %rsp -; CHECK: popq %rbx -; CHECK: popq %r14 -; CHECK: movq %rbp, %rsp -; CHECK: popq %rbp -} - -declare void @t4_helper(i32*, i32*, <8 x float>*) - -; Dynamic realignment + Spill -define i32 @t5(float* nocapture %f) nounwind uwtable ssp { -entry: - %a = alloca i32, align 4 - %0 = bitcast float* %f to <8 x float>* - %1 = load <8 x float>* %0, align 32 - call void @t5_helper1(i32* %a) nounwind - call void @t5_helper2(<8 x float> %1) nounwind - %2 = load i32* %a, align 4 - %add = add nsw i32 %2, 13 - ret i32 %add - -; CHECK: _t5 -; CHECK: pushq %rbp -; CHECK: movq %rsp, %rbp -; CHECK: andq $-32, %rsp -; CHECK: subq ${{[0-9]+}}, %rsp -; -; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]] -; CHECK: vmovaps [[AVXREG]], (%rsp) -; CHECK: leaq {{[0-9]+}}(%rsp), %rdi -; CHECK: callq _t5_helper1 -; CHECK: vmovaps (%rsp), %ymm0 -; CHECK: callq _t5_helper2 -; CHECK: movl {{[0-9]+}}(%rsp), %eax -; -; CHECK: movq %rbp, %rsp -; CHECK: popq %rbp -} - -declare void @t5_helper1(i32*) - -declare void @t5_helper2(<8 x float>) - -; VLAs + Dynamic realignment + Spill -; FIXME: RA has already reserved RBX, so we can't do dynamic realignment. -define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp { -entry: -; CHECK: _t6 - %a = alloca i32, align 4 - %0 = bitcast float* %f to <8 x float>* - %1 = load <8 x float>* %0, align 32 - %vla = alloca i32, i64 %sz, align 16 - call void @t6_helper1(i32* %a, i32* %vla) nounwind - call void @t6_helper2(<8 x float> %1) nounwind - %2 = load i32* %a, align 4 - %add = add nsw i32 %2, 13 - ret i32 %add -} - -declare void @t6_helper1(i32*, i32*) - -declare void @t6_helper2(<8 x float>) diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll index 0f16a64ccd..7ab10a5886 100644 --- a/test/CodeGen/X86/epilogue.ll +++ b/test/CodeGen/X86/epilogue.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea -; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl %ebp} +; RUN: llc < %s -mcpu=generic -march=x86 | grep "movl %ebp" declare void @bar(<2 x i64>* %n) diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll index 14778f097e..9e1a3754d0 100644 --- a/test/CodeGen/X86/extractps.ll +++ b/test/CodeGen/X86/extractps.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mcpu=penryn > %t ; RUN: not grep movd %t -; RUN: grep {movss %xmm} %t | count 1 -; RUN: grep {extractps \\\$1, %xmm0, } %t | count 1 +; RUN: grep "movss %xmm" %t | count 1 +; RUN: grep "extractps \$1, %xmm0, " %t | count 1 ; PR2647 external global float, align 16 ; <float*>:0 [#uses=2] diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll index e4982f0549..14cb136f89 100644 --- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll +++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {add ESP, 8} +; RUN: grep "add ESP, 8" target triple = "i686-pc-linux-gnu" diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll index 323c8533ce..b3adb802a8 100644 --- a/test/CodeGen/X86/fast-isel-constpool.ll +++ b/test/CodeGen/X86/fast-isel-constpool.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)} +; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)" ; Make sure fast isel uses rip-relative addressing when required. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin9.0" diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll index 34f8b38252..cb2464e746 100644 --- a/test/CodeGen/X86/fast-isel-gv.ll +++ b/test/CodeGen/X86/fast-isel-gv.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)} +; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin10.0" @f = global i8 (...)* @kill ; <i8 (...)**> [#uses=1] diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll index 52b3e57b96..f1204d677a 100644 --- a/test/CodeGen/X86/fastcc-byval.ll +++ b/test/CodeGen/X86/fastcc-byval.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2 +; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2 ; PR3122 ; rdar://6400815 diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll new file mode 100644 index 0000000000..48f963f58e --- /dev/null +++ b/test/CodeGen/X86/force-align-stack-alloca.ll @@ -0,0 +1,63 @@ +; This test is attempting to detect when we request forced re-alignment of the +; stack to an alignment greater than would be available due to the ABI. We +; arbitrarily force alignment up to 32-bytes for i386 hoping that this will +; exceed any ABI provisions. +; +; RUN: llc < %s -force-align-stack -stack-alignment=32 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +define i32 @f(i8* %p) nounwind { +entry: + %0 = load i8* %p + %conv = sext i8 %0 to i32 + ret i32 %conv +} + +define i64 @g(i32 %i) nounwind { +; CHECK: g: +; CHECK: pushl +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl +; CHECK-NEXT: subl $20, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; +; The next adjustment of the stack is due to the alloca. +; CHECK: movl %{{...}}, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; +; Next we set up the memset call, and then undo it. +; CHECK: subl $32, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; CHECK: calll memset +; CHECK-NEXT: addl $32, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; +; Next we set up the call to 'f'. +; CHECK: subl $32, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; CHECK: calll f +; CHECK-NEXT: addl $32, %esp +; CHECK-NOT: {{[^ ,]*}}, %esp +; +; Finally we nede to restore %esp from %ebp, the alloca prevents us from +; restoring it directly. +; CHECK-NOT: popl +; CHECK: leal -4(%ebp), %esp +; CHECK-NEXT: popl +; CHECK-NEXT: popl +; CHECK-NEXT: ret + +entry: + br label %if.then + +if.then: + %0 = alloca i8, i32 %i + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 %i, i32 1, i1 false) + %call = call i32 @f(i8* %0) + %conv = sext i32 %call to i64 + ret i64 %conv +} + +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll index cafc61a41f..62d81003a6 100644 --- a/test/CodeGen/X86/fp-immediate-shorten.ll +++ b/test/CodeGen/X86/fp-immediate-shorten.ll @@ -1,7 +1,7 @@ ;; Test that this FP immediate is stored in the constant pool as a float. ; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \ -; RUN: grep {.long.1123418112} +; RUN: grep ".long.1123418112" define double @D() { ret double 1.230000e+02 diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll index 0145069b8c..a2cea5e57f 100644 --- a/test/CodeGen/X86/fp_load_fold.ll +++ b/test/CodeGen/X86/fp_load_fold.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul} +; RUN: grep -i ST | not grep "fadd\|fsub\|fdiv\|fmul" ; Test that the load of the memory location is folded into the operation. diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index ff9b1b0b6a..655ab29127 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 >%t -; RUN: grep {addl \\\$4,} %t | count 3 -; RUN: not grep {,%} %t +; RUN: grep "addl \$4," %t | count 3 +; RUN: not grep ",%" %t define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { entry: diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll index 76ffd66524..968a9e88c0 100644 --- a/test/CodeGen/X86/h-register-addressing-32.ll +++ b/test/CodeGen/X86/h-register-addressing-32.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {movzbl %\[abcd\]h,} | count 7 +; RUN: llc < %s -march=x86 | grep "movzbl %[abcd]h," | count 7 ; Use h-register extract and zero-extend. diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll index 98817f3fb5..a19fca5558 100644 --- a/test/CodeGen/X86/h-register-addressing-64.ll +++ b/test/CodeGen/X86/h-register-addressing-64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7 +; RUN: llc < %s -march=x86-64 | grep "movzbl %[abcd]h," | count 7 ; Use h-register extract and zero-extend. diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll index 402cdfe413..903c4538ab 100644 --- a/test/CodeGen/X86/h-registers-1.ll +++ b/test/CodeGen/X86/h-registers-1.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-linux > %t -; RUN: grep {movzbl %\[abcd\]h,} %t | count 8 -; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d} +; RUN: grep "movzbl %[abcd]h," %t | count 8 +; RUN: grep "%[abcd]h" %t | not grep "%r[[:digit:]]*d" ; LLVM creates virtual registers for values live across blocks ; based on the type of the value. Make sure that the extracts diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll index 4289fa7cc2..74ecd045b3 100644 --- a/test/CodeGen/X86/hoist-invariant-load.ll +++ b/test/CodeGen/X86/hoist-invariant-load.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -stats -O2 |& grep "1 machine-licm" +; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7.2" diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll index b8a129d963..62a21f4c5a 100644 --- a/test/CodeGen/X86/illegal-vector-args-return.ll +++ b/test/CodeGen/X86/illegal-vector-args-return.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep {mulpd %xmm3, %xmm1} -; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep {mulpd %xmm2, %xmm0} -; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep {addps %xmm3, %xmm1} -; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep {addps %xmm2, %xmm0} +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm3, %xmm1" +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "mulpd %xmm2, %xmm0" +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1" +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0" define <4 x double> @foo(<4 x double> %x, <4 x double> %z) { %y = fmul <4 x double> %x, %z diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll index 5e76b6c058..b069c46318 100644 --- a/test/CodeGen/X86/inline-asm-modifier-n.ll +++ b/test/CodeGen/X86/inline-asm-modifier-n.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep { 37} +; RUN: llc < %s -march=x86 | grep " 37" ; rdar://7008959 define void @bork() nounwind { diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll index 5ed0e00fd8..b162666362 100644 --- a/test/CodeGen/X86/isel-sink2.ll +++ b/test/CodeGen/X86/isel-sink2.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 > %t -; RUN: grep {movb.7(%...)} %t +; RUN: grep "movb.7(%...)" %t ; RUN: not grep leal %t define i8 @test(i32 *%P) nounwind { diff --git a/test/CodeGen/X86/ispositive.ll b/test/CodeGen/X86/ispositive.ll index 8adf723aab..b1d1a20c8e 100644 --- a/test/CodeGen/X86/ispositive.ll +++ b/test/CodeGen/X86/ispositive.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {shrl.*31} +; RUN: llc < %s -march=x86 | grep "shrl.*31" define i32 @test1(i32 %X) { entry: diff --git a/test/CodeGen/X86/label-redefinition.ll b/test/CodeGen/X86/label-redefinition.ll index 9ad33e0297..9e88a18e87 100644 --- a/test/CodeGen/X86/label-redefinition.ll +++ b/test/CodeGen/X86/label-redefinition.ll @@ -1,5 +1,5 @@ ; PR7054 -; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly} +; RUN: not llc %s -o - 2>&1 | grep "'_foo' label emitted multiple times to assembly" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0.0" diff --git a/test/CodeGen/X86/large-global.ll b/test/CodeGen/X86/large-global.ll new file mode 100644 index 0000000000..7cb974b21e --- /dev/null +++ b/test/CodeGen/X86/large-global.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s +; rdar://11729134 + +; EmitZerofill was incorrectly expecting a 32-bit "size" so 26214400000 +; was printed as 444596224 + +%struct.X = type { [25000 x i8] } + +@gArray = global [1048576 x %struct.X] zeroinitializer, align 16 + +; CHECK: .zerofill __DATA,__common,_gArray,26214400000,4 diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll index 69303507d6..43f69b0c6e 100644 --- a/test/CodeGen/X86/lea-2.ll +++ b/test/CodeGen/X86/lea-2.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {lea EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]} +; RUN: grep "lea EAX, DWORD PTR \[... + 4\*... - 5\]" ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ ; RUN: not grep add diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll index 7491190b01..9f7d036cf1 100644 --- a/test/CodeGen/X86/multiple-loop-post-inc.ll +++ b/test/CodeGen/X86/multiple-loop-post-inc.ll @@ -1,9 +1,9 @@ ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s ; rdar://7236213 - -; Xfailed now that scheduler 2-address hack is disabled a lea is generated. -; The code isn't any worse though. -; XFAIL: * +; +; The scheduler's 2-address hack has been disabled, so there is +; currently no good guarantee that this test will pass until the +; machine scheduler develops an equivalent heuristic. ; CodeGen shouldn't require any lea instructions inside the marked loop. ; It should properly set up post-increment uses and do coalescing for diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll index d185af16b9..e987495f2c 100644 --- a/test/CodeGen/X86/overlap-shift.ll +++ b/test/CodeGen/X86/overlap-shift.ll @@ -7,7 +7,7 @@ ; Check that the shift gets turned into an LEA. ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: not grep {mov E.X, E.X} +; RUN: not grep "mov E.X, E.X" @G = external global i32 ; <i32*> [#uses=1] diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll index d48a331826..f958b6b2c0 100644 --- a/test/CodeGen/X86/peep-vector-extract-insert.ll +++ b/test/CodeGen/X86/peep-vector-extract-insert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep {xorps %xmm0, %xmm0} | count 2 +; RUN: llc < %s -march=x86-64 | grep "xorps %xmm0, %xmm0" | count 2 define float @foo(<4 x float> %a) { %b = insertelement <4 x float> %a, float 0.0, i32 3 diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll index ef02af2d78..476bb10998 100644 --- a/test/CodeGen/X86/phi-immediate-factoring.ll +++ b/test/CodeGen/X86/phi-immediate-factoring.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6 +; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 ; PR1296 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll index afd71143c4..f0e31f7f5f 100644 --- a/test/CodeGen/X86/pr2656.ll +++ b/test/CodeGen/X86/pr2656.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1 +; RUN: llc < %s -march=x86 -mattr=+sse2 | grep "xorps.*sp" | count 1 ; PR2656 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll index 112253038b..d8f37781fc 100644 --- a/test/CodeGen/X86/pr3522.ll +++ b/test/CodeGen/X86/pr3522.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk} +; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk" ; PR3522 target triple = "i386-pc-linux-gnu" diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll index e0b5f7a870..52d7b56f18 100644 --- a/test/CodeGen/X86/regpressure.ll +++ b/test/CodeGen/X86/regpressure.ll @@ -1,8 +1,8 @@ ;; Both functions in this testcase should codegen to the same function, and ;; neither of them should require spilling anything to the stack. -; RUN: llc < %s -march=x86 -stats |& \ -; RUN: not grep {Number of register spills} +; RUN: llc < %s -march=x86 -stats 2>&1 | \ +; RUN: not grep "Number of register spills" ;; This can be compiled to use three registers if the loads are not ;; folded into the multiplies, 2 registers otherwise. diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll index 75f438d26c..f6095a7556 100644 --- a/test/CodeGen/X86/remat-scalar-zero.ll +++ b/test/CodeGen/X86/remat-scalar-zero.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t ; RUN: not grep xor %t ; RUN: not grep movap %t -; RUN: grep {\\.quad.*0} %t +; RUN: grep "\.quad.*0" %t ; Remat should be able to fold the zero constant into the div instructions ; as a constant-pool load. diff --git a/test/CodeGen/X86/rotate.ll b/test/CodeGen/X86/rotate.ll index 1e20273194..117300110b 100644 --- a/test/CodeGen/X86/rotate.ll +++ b/test/CodeGen/X86/rotate.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {ro\[rl\]} | count 12 +; RUN: grep "ro[rl]" | count 12 define i32 @rotl32(i32 %A, i8 %Amt) { %shift.upgrd.1 = zext i8 %Amt to i32 ; <i32> [#uses=1] diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll index d38f9a88fc..4f27e97fb3 100644 --- a/test/CodeGen/X86/shift-coalesce.ll +++ b/test/CodeGen/X86/shift-coalesce.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {shld.*CL} +; RUN: grep "shld.*CL" ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: not grep {mov CL, BL} +; RUN: not grep "mov CL, BL" ; PR687 diff --git a/test/CodeGen/X86/shift-double.ll b/test/CodeGen/X86/shift-double.ll index 5adee7c769..8d2b2907c5 100644 --- a/test/CodeGen/X86/shift-double.ll +++ b/test/CodeGen/X86/shift-double.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {sh\[lr\]d} | count 5 +; RUN: grep "sh[lr]d" | count 5 define i64 @test1(i64 %X, i8 %C) { %shift.upgrd.1 = zext i8 %C to i64 ; <i64> [#uses=1] diff --git a/test/CodeGen/X86/shl_elim.ll b/test/CodeGen/X86/shl_elim.ll index 0827221875..83e1eb5c39 100644 --- a/test/CodeGen/X86/shl_elim.ll +++ b/test/CodeGen/X86/shl_elim.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 | grep {movl 8(.esp), %eax} -; RUN: llc < %s -march=x86 | grep {shrl .eax} -; RUN: llc < %s -march=x86 | grep {movswl .ax, .eax} +; RUN: llc < %s -march=x86 | grep "movl 8(.esp), %eax" +; RUN: llc < %s -march=x86 | grep "shrl .eax" +; RUN: llc < %s -march=x86 | grep "movswl .ax, .eax" define i32 @test1(i64 %a) nounwind { %tmp29 = lshr i64 %a, 24 ; <i64> [#uses=1] diff --git a/test/CodeGen/X86/sse_reload_fold.ll b/test/CodeGen/X86/sse_reload_fold.ll index a57fa588f0..fd8db3be10 100644 --- a/test/CodeGen/X86/sse_reload_fold.ll +++ b/test/CodeGen/X86/sse_reload_fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic |& FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates -regalloc=basic 2>&1 | FileCheck %s ; CHECK: fail ; CHECK-NOT: fail diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector-linux.ll index fe2a9c5d57..c07511443b 100644 --- a/test/CodeGen/X86/stack-protector-linux.ll +++ b/test/CodeGen/X86/stack-protector-linux.ll @@ -1,8 +1,8 @@ ; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs: ; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs: ; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs: -; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard} -; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail} +; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_guard" +; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep "__stack_chk_fail" @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll index a297728aee..4f31ab5a92 100644 --- a/test/CodeGen/X86/subreg-to-reg-1.ll +++ b/test/CodeGen/X86/subreg-to-reg-1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep {leal .*), %e.\*} | count 1 +; RUN: llc < %s -march=x86-64 | grep "leal .*), %e.*" | count 1 ; Don't eliminate or coalesce away the explicit zero-extension! ; This is currently using an leal because of a 3-addressification detail, diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll index 0ea5541c89..0693789fe5 100644 --- a/test/CodeGen/X86/subreg-to-reg-4.ll +++ b/test/CodeGen/X86/subreg-to-reg-4.ll @@ -5,7 +5,7 @@ ; RUN: not grep negq %t ; RUN: not grep addq %t ; RUN: not grep subq %t -; RUN: not grep {movl %} %t +; RUN: not grep "movl %" %t ; Utilize implicit zero-extension on x86-64 to eliminate explicit ; zero-extensions. Shrink 64-bit adds to 32-bit when the high diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll index 03d6f9411e..118eee6ba6 100644 --- a/test/CodeGen/X86/tailcallbyval.ll +++ b/test/CodeGen/X86/tailcallbyval.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL -; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1 +; RUN: llc < %s -march=x86 -tailcallopt | grep "movl[[:space:]]*4(%esp), %eax" | count 1 %struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll new file mode 100644 index 0000000000..7c527e210a --- /dev/null +++ b/test/CodeGen/X86/tls-models.ll @@ -0,0 +1,166 @@ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64_PIC %s +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32_PIC %s + +; Darwin always uses the same model. +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN %s + +@external_gd = external thread_local global i32 +@internal_gd = internal thread_local global i32 42 + +@external_ld = external thread_local(localdynamic) global i32 +@internal_ld = internal thread_local(localdynamic) global i32 42 + +@external_ie = external thread_local(initialexec) global i32 +@internal_ie = internal thread_local(initialexec) global i32 42 + +@external_le = external thread_local(localexec) global i32 +@internal_le = internal thread_local(localexec) global i32 42 + +; ----- no model specified ----- + +define i32* @f1() { +entry: + ret i32* @external_gd + + ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. + ; X64: f1: + ; X64: external_gd@GOTTPOFF + ; X32: f1: + ; X32: external_gd@INDNTPOFF + ; X64_PIC: f1: + ; X64_PIC: external_gd@TLSGD + ; X32_PIC: f1: + ; X32_PIC: external_gd@TLSGD + ; DARWIN: f1: + ; DARWIN: _external_gd@TLVP +} + +define i32* @f2() { +entry: + ret i32* @internal_gd + + ; Non-PIC code can use local exec, PIC code can use local dynamic. + ; X64: f2: + ; X64: internal_gd@TPOFF + ; X32: f2: + ; X32: internal_gd@NTPOFF + ; X64_PIC: f2: + ; X64_PIC: internal_gd@TLSLD + ; X32_PIC: f2: + ; X32_PIC: internal_gd@TLSLDM + ; DARWIN: f2: + ; DARWIN: _internal_gd@TLVP +} + + +; ----- localdynamic specified ----- + +define i32* @f3() { +entry: + ret i32* @external_ld + + ; Non-PIC code can use initial exec, PIC code use local dynamic as specified. + ; X64: f3: + ; X64: external_ld@GOTTPOFF + ; X32: f3: + ; X32: external_ld@INDNTPOFF + ; X64_PIC: f3: + ; X64_PIC: external_ld@TLSLD + ; X32_PIC: f3: + ; X32_PIC: external_ld@TLSLDM + ; DARWIN: f3: + ; DARWIN: _external_ld@TLVP +} + +define i32* @f4() { +entry: + ret i32* @internal_ld + + ; Non-PIC code can use local exec, PIC code can use local dynamic. + ; X64: f4: + ; X64: internal_ld@TPOFF + ; X32: f4: + ; X32: internal_ld@NTPOFF + ; X64_PIC: f4: + ; X64_PIC: internal_ld@TLSLD + ; X32_PIC: f4: + ; X32_PIC: internal_ld@TLSLDM + ; DARWIN: f4: + ; DARWIN: _internal_ld@TLVP +} + + +; ----- initialexec specified ----- + +define i32* @f5() { +entry: + ret i32* @external_ie + + ; Non-PIC and PIC code will use initial exec as specified. + ; X64: f5: + ; X64: external_ie@GOTTPOFF + ; X32: f5: + ; X32: external_ie@INDNTPOFF + ; X64_PIC: f5: + ; X64_PIC: external_ie@GOTTPOFF + ; X32_PIC: f5: + ; X32_PIC: external_ie@GOTNTPOFF + ; DARWIN: f5: + ; DARWIN: _external_ie@TLVP +} + +define i32* @f6() { +entry: + ret i32* @internal_ie + + ; Non-PIC code can use local exec, PIC code use initial exec as specified. + ; X64: f6: + ; X64: internal_ie@TPOFF + ; X32: f6: + ; X32: internal_ie@NTPOFF + ; X64_PIC: f6: + ; X64_PIC: internal_ie@GOTTPOFF + ; X32_PIC: f6: + ; X32_PIC: internal_ie@GOTNTPOFF + ; DARWIN: f6: + ; DARWIN: _internal_ie@TLVP +} + + +; ----- localexec specified ----- + +define i32* @f7() { +entry: + ret i32* @external_le + + ; Non-PIC and PIC code will use local exec as specified. + ; X64: f7: + ; X64: external_le@TPOFF + ; X32: f7: + ; X32: external_le@NTPOFF + ; X64_PIC: f7: + ; X64_PIC: external_le@TPOFF + ; X32_PIC: f7: + ; X32_PIC: external_le@NTPOFF + ; DARWIN: f7: + ; DARWIN: _external_le@TLVP +} + +define i32* @f8() { +entry: + ret i32* @internal_le + + ; Non-PIC and PIC code will use local exec as specified. + ; X64: f8: + ; X64: internal_le@TPOFF + ; X32: f8: + ; X32: internal_le@NTPOFF + ; X64_PIC: f8: + ; X64_PIC: internal_le@TPOFF + ; X32_PIC: f8: + ; X32_PIC: internal_le@NTPOFF + ; DARWIN: f8: + ; DARWIN: _internal_le@TLVP +} diff --git a/test/CodeGen/X86/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll index 51ddc491aa..af6d47af7a 100644 --- a/test/CodeGen/X86/twoaddr-coalesce-2.ll +++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats |& \ -; RUN: grep {twoaddrinstr} | grep {Number of instructions aggressively commuted} +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \ +; RUN: grep "twoaddrinstr" | grep "Number of instructions aggressively commuted" ; rdar://6480363 target triple = "i386-apple-darwin9.6" diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll index 077fee0773..513c304e3b 100644 --- a/test/CodeGen/X86/twoaddr-pass-sink.ll +++ b/test/CodeGen/X86/twoaddr-pass-sink.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk} +; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk" define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind { entry: diff --git a/test/CodeGen/X86/uint_to_fp.ll b/test/CodeGen/X86/uint_to_fp.ll index 41ee1947ed..0536eb0522 100644 --- a/test/CodeGen/X86/uint_to_fp.ll +++ b/test/CodeGen/X86/uint_to_fp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp} +; RUN: llc < %s -march=x86 -mcpu=yonah | not grep "sub.*esp" ; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss ; rdar://6034396 diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll index 7416051693..56fdadbf93 100644 --- a/test/CodeGen/X86/umul-with-carry.ll +++ b/test/CodeGen/X86/umul-with-carry.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep {jc} | count 1 +; RUN: llc < %s -march=x86 | grep "jc" | count 1 ; XFAIL: * ; FIXME: umul-with-overflow not supported yet. diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll new file mode 100644 index 0000000000..a438723d9b --- /dev/null +++ b/test/CodeGen/X86/unwindraise.ll @@ -0,0 +1,252 @@ +; RUN: llc < %s -verify-machineinstrs +; PR13188 +; +; The _Unwind_RaiseException function can return normally and via eh.return. +; This causes confusion about the function live-out registers, since the two +; different ways of returning have different return values. +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-freebsd9.0" + +%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i64, i64, i64, [18 x i8] } +%struct.dwarf_eh_bases = type { i8*, i8*, i8* } +%struct._Unwind_FrameState = type { %struct.frame_state_reg_info, i64, i64, i8*, i32, i8*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i64, i64, i64, i8, i8, i8, i8, i8* } +%struct.frame_state_reg_info = type { [18 x %struct.anon], %struct.frame_state_reg_info* } +%struct.anon = type { %union.anon, i32 } +%union.anon = type { i64 } +%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i64, i64 } + +@dwarf_reg_size_table = external hidden unnamed_addr global [18 x i8], align 16 + +declare void @abort() noreturn + +declare fastcc i32 @uw_frame_state_for(%struct._Unwind_Context*, %struct._Unwind_FrameState*) uwtable + +define hidden i32 @_Unwind_RaiseException(%struct._Unwind_Exception* %exc) uwtable { +entry: + %fs.i = alloca %struct._Unwind_FrameState, align 8 + %this_context = alloca %struct._Unwind_Context, align 8 + %cur_context = alloca %struct._Unwind_Context, align 8 + %fs = alloca %struct._Unwind_FrameState, align 8 + call void @llvm.eh.unwind.init() + %0 = call i8* @llvm.eh.dwarf.cfa(i32 0) + %1 = call i8* @llvm.returnaddress(i32 0) + call fastcc void @uw_init_context_1(%struct._Unwind_Context* %this_context, i8* %0, i8* %1) + %2 = bitcast %struct._Unwind_Context* %cur_context to i8* + %3 = bitcast %struct._Unwind_Context* %this_context to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false) + %personality = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 6 + %retaddr_column.i = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 9 + %flags.i.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 5 + %ra.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 2 + %exception_class = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 0 + br label %while.body + +while.body: ; preds = %uw_update_context.exit, %entry + %call = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs) + switch i32 %call, label %do.end21 [ + i32 5, label %do.end21.loopexit46 + i32 0, label %if.end3 + ] + +if.end3: ; preds = %while.body + %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8, !tbaa !0 + %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null + br i1 %tobool, label %if.end13, label %if.then4 + +if.then4: ; preds = %if.end3 + %5 = load i64* %exception_class, align 8, !tbaa !3 + %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context) + switch i32 %call6, label %do.end21.loopexit46 [ + i32 6, label %while.end + i32 8, label %if.end13 + ] + +if.end13: ; preds = %if.then4, %if.end3 + call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs) + %6 = load i64* %retaddr_column.i, align 8, !tbaa !3 + %conv.i = trunc i64 %6 to i32 + %cmp.i.i.i = icmp slt i32 %conv.i, 18 + br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i + +cond.true.i.i.i: ; preds = %if.end13 + call void @abort() noreturn + unreachable + +cond.end.i.i.i: ; preds = %if.end13 + %sext.i = shl i64 %6, 32 + %idxprom.i.i.i = ashr exact i64 %sext.i, 32 + %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i + %7 = load i8* %arrayidx.i.i.i, align 1, !tbaa !1 + %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i + %8 = load i8** %arrayidx2.i.i.i, align 8, !tbaa !0 + %9 = load i64* %flags.i.i.i.i, align 8, !tbaa !3 + %and.i.i.i.i = and i64 %9, 4611686018427387904 + %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0 + br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i + +land.lhs.true.i.i.i: ; preds = %cond.end.i.i.i + %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i + %10 = load i8* %arrayidx4.i.i.i, align 1, !tbaa !1 + %tobool6.i.i.i = icmp eq i8 %10, 0 + br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i + +if.then.i.i.i: ; preds = %land.lhs.true.i.i.i + %11 = ptrtoint i8* %8 to i64 + br label %uw_update_context.exit + +if.end.i.i.i: ; preds = %land.lhs.true.i.i.i, %cond.end.i.i.i + %cmp8.i.i.i = icmp eq i8 %7, 8 + br i1 %cmp8.i.i.i, label %if.then10.i.i.i, label %cond.true14.i.i.i + +if.then10.i.i.i: ; preds = %if.end.i.i.i + %12 = bitcast i8* %8 to i64* + %13 = load i64* %12, align 8, !tbaa !3 + br label %uw_update_context.exit + +cond.true14.i.i.i: ; preds = %if.end.i.i.i + call void @abort() noreturn + unreachable + +uw_update_context.exit: ; preds = %if.then10.i.i.i, %if.then.i.i.i + %retval.0.i.i.i = phi i64 [ %11, %if.then.i.i.i ], [ %13, %if.then10.i.i.i ] + %14 = inttoptr i64 %retval.0.i.i.i to i8* + store i8* %14, i8** %ra.i, align 8, !tbaa !0 + br label %while.body + +while.end: ; preds = %if.then4 + %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2 + store i64 0, i64* %private_1, align 8, !tbaa !3 + %15 = load i8** %ra.i, align 8, !tbaa !0 + %16 = ptrtoint i8* %15 to i64 + %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3 + store i64 %16, i64* %private_2, align 8, !tbaa !3 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false) + %17 = bitcast %struct._Unwind_FrameState* %fs.i to i8* + call void @llvm.lifetime.start(i64 -1, i8* %17) + %personality.i = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 6 + %retaddr_column.i22 = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 9 + br label %while.body.i + +while.body.i: ; preds = %uw_update_context.exit44, %while.end + %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i) + %18 = load i8** %ra.i, align 8, !tbaa !0 + %19 = ptrtoint i8* %18 to i64 + %20 = load i64* %private_2, align 8, !tbaa !3 + %cmp.i = icmp eq i64 %19, %20 + %cmp2.i = icmp eq i32 %call.i, 0 + br i1 %cmp2.i, label %if.end.i, label %do.end21 + +if.end.i: ; preds = %while.body.i + %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8, !tbaa !0 + %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null + br i1 %tobool.i, label %if.end12.i, label %if.then3.i + +if.then3.i: ; preds = %if.end.i + %or.i = select i1 %cmp.i, i32 6, i32 2 + %22 = load i64* %exception_class, align 8, !tbaa !3 + %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context) + switch i32 %call5.i, label %do.end21 [ + i32 7, label %do.body19 + i32 8, label %if.end12.i + ] + +if.end12.i: ; preds = %if.then3.i, %if.end.i + br i1 %cmp.i, label %cond.true.i, label %cond.end.i + +cond.true.i: ; preds = %if.end12.i + call void @abort() noreturn + unreachable + +cond.end.i: ; preds = %if.end12.i + call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i) + %23 = load i64* %retaddr_column.i22, align 8, !tbaa !3 + %conv.i23 = trunc i64 %23 to i32 + %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18 + br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25 + +cond.true.i.i.i25: ; preds = %cond.end.i + call void @abort() noreturn + unreachable + +cond.end.i.i.i33: ; preds = %cond.end.i + %sext.i26 = shl i64 %23, 32 + %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32 + %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27 + %24 = load i8* %arrayidx.i.i.i28, align 1, !tbaa !1 + %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27 + %25 = load i8** %arrayidx2.i.i.i29, align 8, !tbaa !0 + %26 = load i64* %flags.i.i.i.i, align 8, !tbaa !3 + %and.i.i.i.i31 = and i64 %26, 4611686018427387904 + %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0 + br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36 + +land.lhs.true.i.i.i36: ; preds = %cond.end.i.i.i33 + %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27 + %27 = load i8* %arrayidx4.i.i.i34, align 1, !tbaa !1 + %tobool6.i.i.i35 = icmp eq i8 %27, 0 + br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37 + +if.then.i.i.i37: ; preds = %land.lhs.true.i.i.i36 + %28 = ptrtoint i8* %25 to i64 + br label %uw_update_context.exit44 + +if.end.i.i.i39: ; preds = %land.lhs.true.i.i.i36, %cond.end.i.i.i33 + %cmp8.i.i.i38 = icmp eq i8 %24, 8 + br i1 %cmp8.i.i.i38, label %if.then10.i.i.i40, label %cond.true14.i.i.i41 + +if.then10.i.i.i40: ; preds = %if.end.i.i.i39 + %29 = bitcast i8* %25 to i64* + %30 = load i64* %29, align 8, !tbaa !3 + br label %uw_update_context.exit44 + +cond.true14.i.i.i41: ; preds = %if.end.i.i.i39 + call void @abort() noreturn + unreachable + +uw_update_context.exit44: ; preds = %if.then10.i.i.i40, %if.then.i.i.i37 + %retval.0.i.i.i42 = phi i64 [ %28, %if.then.i.i.i37 ], [ %30, %if.then10.i.i.i40 ] + %31 = inttoptr i64 %retval.0.i.i.i42 to i8* + store i8* %31, i8** %ra.i, align 8, !tbaa !0 + br label %while.body.i + +do.body19: ; preds = %if.then3.i + call void @llvm.lifetime.end(i64 -1, i8* %17) + %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context) + %32 = load i8** %ra.i, align 8, !tbaa !0 + call void @llvm.eh.return.i64(i64 %call20, i8* %32) + unreachable + +do.end21.loopexit46: ; preds = %if.then4, %while.body + %retval.0.ph = phi i32 [ 3, %if.then4 ], [ 5, %while.body ] + br label %do.end21 + +do.end21: ; preds = %do.end21.loopexit46, %if.then3.i, %while.body.i, %while.body + %retval.0 = phi i32 [ %retval.0.ph, %do.end21.loopexit46 ], [ 3, %while.body ], [ 2, %while.body.i ], [ 2, %if.then3.i ] + ret i32 %retval.0 +} + +declare void @llvm.eh.unwind.init() nounwind + +declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*) uwtable + +declare i8* @llvm.eh.dwarf.cfa(i32) nounwind + +declare i8* @llvm.returnaddress(i32) nounwind readnone + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +declare fastcc i64 @uw_install_context_1(%struct._Unwind_Context*, %struct._Unwind_Context*) uwtable + +declare void @llvm.eh.return.i64(i64, i8*) nounwind + +declare fastcc void @uw_update_context_1(%struct._Unwind_Context*, %struct._Unwind_FrameState* nocapture) uwtable + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + +!0 = metadata !{metadata !"any pointer", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"long", metadata !1} diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll index f2fc7e7d9d..e0862ca8d1 100644 --- a/test/CodeGen/X86/vec_call.ll +++ b/test/CodeGen/X86/vec_call.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ -; RUN: grep {subl.*60} +; RUN: grep "subl.*60" ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \ -; RUN: grep {movaps.*32} +; RUN: grep "movaps.*32" define void @test() { diff --git a/test/CodeGen/X86/vec_ins_extract-1.ll b/test/CodeGen/X86/vec_ins_extract-1.ll index 29511934af..565be7a6cc 100644 --- a/test/CodeGen/X86/vec_ins_extract-1.ll +++ b/test/CodeGen/X86/vec_ins_extract-1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4 +; RUN: llc < %s -march=x86 -mcpu=yonah | grep "(%esp,%eax,4)" | count 4 ; Inserts and extracts with variable indices must be lowered ; to memory accesses. diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll index 3656e5f6ca..b8ec0cf080 100644 --- a/test/CodeGen/X86/vec_set-9.ll +++ b/test/CodeGen/X86/vec_set-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86-64 | grep movd | count 1 -; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0} +; RUN: llc < %s -march=x86-64 | grep "movlhps.*%xmm0, %xmm0" define <2 x i64> @test3(i64 %A) nounwind { entry: diff --git a/test/CodeGen/X86/x86-64-arg.ll b/test/CodeGen/X86/x86-64-arg.ll index ec8dd8edb6..9a959e839a 100644 --- a/test/CodeGen/X86/x86-64-arg.ll +++ b/test/CodeGen/X86/x86-64-arg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep {movl %edi, %eax} +; RUN: llc < %s | grep "movl %edi, %eax" ; The input value is already sign extended, don't re-extend it. ; This testcase corresponds to: ; int test(short X) { return (int)X; } diff --git a/test/CodeGen/X86/x86-64-pic-1.ll b/test/CodeGen/X86/x86-64-pic-1.ll index 46f6d335d0..46cd4f81bc 100644 --- a/test/CodeGen/X86/x86-64-pic-1.ll +++ b/test/CodeGen/X86/x86-64-pic-1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {callq f@PLT} %t1 +; RUN: grep "callq f@PLT" %t1 define void @g() { entry: diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll index b6f82e23b7..3ec172b2b6 100644 --- a/test/CodeGen/X86/x86-64-pic-10.ll +++ b/test/CodeGen/X86/x86-64-pic-10.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {callq g@PLT} %t1 +; RUN: grep "callq g@PLT" %t1 @g = alias weak i32 ()* @f diff --git a/test/CodeGen/X86/x86-64-pic-11.ll b/test/CodeGen/X86/x86-64-pic-11.ll index 4db331cee4..fd64beb696 100644 --- a/test/CodeGen/X86/x86-64-pic-11.ll +++ b/test/CodeGen/X86/x86-64-pic-11.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {callq __fixunsxfti@PLT} %t1 +; RUN: grep "callq __fixunsxfti@PLT" %t1 define i128 @f(x86_fp80 %a) nounwind { entry: diff --git a/test/CodeGen/X86/x86-64-pic-2.ll b/test/CodeGen/X86/x86-64-pic-2.ll index 1ce2de7209..f3f7b1dffd 100644 --- a/test/CodeGen/X86/x86-64-pic-2.ll +++ b/test/CodeGen/X86/x86-64-pic-2.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {callq f} %t1 -; RUN: not grep {callq f@PLT} %t1 +; RUN: grep "callq f" %t1 +; RUN: not grep "callq f@PLT" %t1 define void @g() { entry: diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll index aa3c888ed6..ba933788a3 100644 --- a/test/CodeGen/X86/x86-64-pic-3.ll +++ b/test/CodeGen/X86/x86-64-pic-3.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {callq f} %t1 -; RUN: not grep {callq f@PLT} %t1 +; RUN: grep "callq f" %t1 +; RUN: not grep "callq f@PLT" %t1 define void @g() { entry: diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll index 90fc1194a3..33b08c4b4b 100644 --- a/test/CodeGen/X86/x86-64-pic-4.ll +++ b/test/CodeGen/X86/x86-64-pic-4.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {movq a@GOTPCREL(%rip),} %t1 +; RUN: grep "movq a@GOTPCREL(%rip)," %t1 @a = global i32 0 diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll index 6369bde694..234bc0d2f4 100644 --- a/test/CodeGen/X86/x86-64-pic-5.ll +++ b/test/CodeGen/X86/x86-64-pic-5.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {movl a(%rip),} %t1 +; RUN: grep "movl a(%rip)," %t1 ; RUN: not grep GOTPCREL %t1 @a = hidden global i32 0 diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll index 6e19ad35bc..ae5b583592 100644 --- a/test/CodeGen/X86/x86-64-pic-6.ll +++ b/test/CodeGen/X86/x86-64-pic-6.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {movl a(%rip),} %t1 +; RUN: grep "movl a(%rip)," %t1 ; RUN: not grep GOTPCREL %t1 @a = internal global i32 0 diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll index 4d98ee6140..de240a38d6 100644 --- a/test/CodeGen/X86/x86-64-pic-7.ll +++ b/test/CodeGen/X86/x86-64-pic-7.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {movq f@GOTPCREL(%rip),} %t1 +; RUN: grep "movq f@GOTPCREL(%rip)," %t1 define void ()* @g() nounwind { entry: diff --git a/test/CodeGen/X86/x86-64-pic-8.ll b/test/CodeGen/X86/x86-64-pic-8.ll index d3b567c610..db35c33623 100644 --- a/test/CodeGen/X86/x86-64-pic-8.ll +++ b/test/CodeGen/X86/x86-64-pic-8.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {leaq f(%rip),} %t1 +; RUN: grep "leaq f(%rip)," %t1 ; RUN: not grep GOTPCREL %t1 define void ()* @g() { diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll index 076103133f..6daea84e1a 100644 --- a/test/CodeGen/X86/x86-64-pic-9.ll +++ b/test/CodeGen/X86/x86-64-pic-9.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -; RUN: grep {leaq f(%rip),} %t1 +; RUN: grep "leaq f(%rip)," %t1 ; RUN: not grep GOTPCREL %t1 define void ()* @g() nounwind { diff --git a/test/CodeGen/XCore/mkmsk.ll b/test/CodeGen/XCore/mkmsk.ll new file mode 100644 index 0000000000..377612b7d2 --- /dev/null +++ b/test/CodeGen/XCore/mkmsk.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=xcore | FileCheck %s + +define i32 @f(i32) nounwind { +; CHECK: f: +; CHECK: mkmsk r0, r0 +; CHECK-NEXT: retsp 0 +entry: + %1 = shl i32 1, %0 + %2 = add i32 %1, -1 + ret i32 %2 +} diff --git a/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 Binary files differnew file mode 100755 index 0000000000..7cee968072 --- /dev/null +++ b/test/DebugInfo/Inputs/dwarfdump-test.elf-x86-64 diff --git a/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 Binary files differnew file mode 100755 index 0000000000..a226e79f3b --- /dev/null +++ b/test/DebugInfo/Inputs/dwarfdump-test2.elf-x86-64 diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll new file mode 100644 index 0000000000..3be9abaffc --- /dev/null +++ b/test/DebugInfo/X86/DW_AT_location-reference.ll @@ -0,0 +1,111 @@ +; RUN: llc -O1 -mtriple=x86_64-apple-darwin < %s | FileCheck -check-prefix=DARWIN %s +; RUN: llc -O1 -mtriple=x86_64-pc-linux-gnu < %s | FileCheck -check-prefix=LINUX %s +; PR9493 +; Adapted from the original test case in r127757. +; We use 'llc -O1' to induce variable 'x' to live in different locations. +; We don't actually care where 'x' lives, or what exact optimizations get +; used, as long as 'x' moves around we're fine. + +; // The variable 'x' lives in different locations, so it needs an entry in +; // the .debug_loc table section, referenced by DW_AT_location. +; // This ref is not relocatable on Darwin, and is relocatable elsewhere. +; extern int g(int, int); +; extern int a; +; +; void f(void) { +; int x; +; a = g(0, 0); +; x = 1; +; while (x & 1) { x *= a; } +; a = g(x, 0); +; x = 2; +; while (x & 2) { x *= a; } +; a = g(0, x); +; } + +; // The 'x' variable and its symbol reference location +; DARWIN: DW_TAG_variable +; DARWIN-NEXT: ## DW_AT_name +; DARWIN-NEXT: .long Lset{{[0-9]+}} +; DARWIN-NEXT: ## DW_AT_decl_file +; DARWIN-NEXT: ## DW_AT_decl_line +; DARWIN-NEXT: ## DW_AT_type +; DARWIN-NEXT: Lset{{[0-9]+}} = Ldebug_loc{{[0-9]+}}-Lsection_debug_loc ## DW_AT_location +; DARWIN-NEXT: .long Lset{{[0-9]+}} + +; LINUX: DW_TAG_variable +; LINUX-NEXT: # DW_AT_name +; LINUX-NEXT: # DW_AT_decl_file +; LINUX-NEXT: # DW_AT_decl_line +; LINUX-NEXT: # DW_AT_type +; LINUX-NEXT: .long .Ldebug_loc{{[0-9]+}} # DW_AT_location + + +; ModuleID = 'simple.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" + +@a = external global i32 + +define void @f() nounwind { +entry: + %call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8 + store i32 %call, i32* @a, align 4, !dbg !8, !tbaa !9 + tail call void @llvm.dbg.value(metadata !12, i64 0, metadata !5), !dbg !13 + br label %while.body + +while.body: ; preds = %entry, %while.body + %x.017 = phi i32 [ 1, %entry ], [ %mul, %while.body ] + %mul = mul nsw i32 %call, %x.017, !dbg !14 + %and = and i32 %mul, 1, !dbg !14 + %tobool = icmp eq i32 %and, 0, !dbg !14 + br i1 %tobool, label %while.end, label %while.body, !dbg !14 + +while.end: ; preds = %while.body + tail call void @llvm.dbg.value(metadata !{i32 %mul}, i64 0, metadata !5), !dbg !14 + %call4 = tail call i32 @g(i32 %mul, i32 0) nounwind, !dbg !15 + store i32 %call4, i32* @a, align 4, !dbg !15, !tbaa !9 + tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !5), !dbg !17 + br label %while.body9 + +while.body9: ; preds = %while.end, %while.body9 + %x.116 = phi i32 [ 2, %while.end ], [ %mul12, %while.body9 ] + %mul12 = mul nsw i32 %call4, %x.116, !dbg !18 + %and7 = and i32 %mul12, 2, !dbg !18 + %tobool8 = icmp eq i32 %and7, 0, !dbg !18 + br i1 %tobool8, label %while.end13, label %while.body9, !dbg !18 + +while.end13: ; preds = %while.body9 + tail call void @llvm.dbg.value(metadata !{i32 %mul12}, i64 0, metadata !5), !dbg !18 + %call15 = tail call i32 @g(i32 0, i32 %mul12) nounwind, !dbg !19 + store i32 %call15, i32* @a, align 4, !dbg !19, !tbaa !9 + ret void, !dbg !20 +} + +declare i32 @g(i32, i32) + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0} +!llvm.dbg.lv.f = !{!5} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !"clang version 3.0 (trunk)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{null} +!5 = metadata !{i32 590080, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_auto_variable ] +!6 = metadata !{i32 589835, metadata !0, i32 4, i32 14, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!7 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 6, i32 3, metadata !6, null} +!9 = metadata !{metadata !"int", metadata !10} +!10 = metadata !{metadata !"omnipotent char", metadata !11} +!11 = metadata !{metadata !"Simple C/C++ TBAA", null} +!12 = metadata !{i32 1} +!13 = metadata !{i32 7, i32 3, metadata !6, null} +!14 = metadata !{i32 8, i32 3, metadata !6, null} +!15 = metadata !{i32 9, i32 3, metadata !6, null} +!16 = metadata !{i32 2} +!17 = metadata !{i32 10, i32 3, metadata !6, null} +!18 = metadata !{i32 11, i32 3, metadata !6, null} +!19 = metadata !{i32 12, i32 3, metadata !6, null} +!20 = metadata !{i32 13, i32 1, metadata !6, null} diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test new file mode 100644 index 0000000000..84fe7f3ec8 --- /dev/null +++ b/test/DebugInfo/dwarfdump-test.test @@ -0,0 +1,25 @@ +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \ +RUN: --address=0x400589 --functions | FileCheck %s -check-prefix MAIN +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \ +RUN: --address=0x400558 --functions | FileCheck %s -check-prefix FUNCTION +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 \ +RUN: --address=0x4005b6 --functions | FileCheck %s -check-prefix CTOR_WITH_SPEC +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \ +RUN: --address=0x4004b8 --functions | FileCheck %s -check-prefix MANY_CU_1 +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test2.elf-x86-64 \ +RUN: --address=0x4004c4 --functions | FileCheck %s -check-prefix MANY_CU_2 + +MAIN: main +MAIN-NEXT: dwarfdump-test.cc:16:10 + +FUNCTION: _Z1fii +FUNCTION-NEXT: dwarfdump-test.cc:11:18 + +CTOR_WITH_SPEC: _ZN10DummyClassC1Ei +CTOR_WITH_SPEC-NEXT: dwarfdump-test.cc:4:30 + +MANY_CU_1: a +MANY_CU_1-NEXT: a.cc:2:0 + +MANY_CU_2: main +MANY_CU_2-NEXT: main.cc:4:0 diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll index 3193791974..396ae85226 100644 --- a/test/DebugInfo/printdbginfo2.ll +++ b/test/DebugInfo/printdbginfo2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s +; RUN: opt < %s -print-dbginfo -disable-output 2>&1 | FileCheck %s ; grep {%b is variable b of type x declared at x.c:7} %t1 ; grep {%2 is variable b of type x declared at x.c:7} %t1 ; grep {@c.1442 is variable c of type int declared at x.c:4} %t1 diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg index ba81a44946..2980ce7081 100644 --- a/test/ExecutionEngine/MCJIT/lit.local.cfg +++ b/test/ExecutionEngine/MCJIT/lit.local.cfg @@ -8,12 +8,12 @@ def getRoot(config): root = getRoot(config) targets = set(root.targets_to_build.split()) -if ('X86' in targets) | ('ARM' in targets): +if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets): config.unsupported = False else: config.unsupported = True -if root.host_arch not in ['x86', 'x86_64', 'ARM']: +if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips']: config.unsupported = True if root.host_os in ['Win32', 'Cygwin', 'MingW', 'Windows', 'Darwin']: diff --git a/test/Feature/globalredefinition3.ll b/test/Feature/globalredefinition3.ll index 5a5b3f1f89..2551d932d8 100644 --- a/test/Feature/globalredefinition3.ll +++ b/test/Feature/globalredefinition3.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null |& grep {redefinition of global '@B'} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "redefinition of global '@B'" @B = global i32 7 @B = global i32 7 diff --git a/test/Feature/load_module.ll b/test/Feature/load_module.ll index 05f6c23813..14c1153fa5 100644 --- a/test/Feature/load_module.ll +++ b/test/Feature/load_module.ll @@ -1,6 +1,6 @@ ; PR1318 ; RUN: opt < %s -load=%llvmshlibdir/LLVMHello%shlibext -hello \ -; RUN: -disable-output |& grep Hello +; RUN: -disable-output 2>&1 | grep Hello ; REQUIRES: loadable_module ; FIXME: On Cygming, it might fail without building LLVMHello manually. diff --git a/test/Feature/packed_struct.ll b/test/Feature/packed_struct.ll index 4d4ace9534..07666491b1 100644 --- a/test/Feature/packed_struct.ll +++ b/test/Feature/packed_struct.ll @@ -2,7 +2,7 @@ ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll ; RUN: diff %t1.ll %t2.ll ; RUN: not grep cast %t2.ll -; RUN: grep {\\}>} %t2.ll +; RUN: grep "}>" %t2.ll ; END. %struct.anon = type <{ i8, i32, i32, i32 }> diff --git a/test/Feature/vector-cast-constant-exprs.ll b/test/Feature/vector-cast-constant-exprs.ll index ffdc0f080f..992987ca04 100644 --- a/test/Feature/vector-cast-constant-exprs.ll +++ b/test/Feature/vector-cast-constant-exprs.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | not grep {ret.*(} +; RUN: llvm-as < %s | llvm-dis | not grep "ret.*(" ; All of these constant expressions should fold. diff --git a/test/Integer/packed_struct_bt.ll b/test/Integer/packed_struct_bt.ll index a4d01e7d84..257c1c66eb 100644 --- a/test/Integer/packed_struct_bt.ll +++ b/test/Integer/packed_struct_bt.ll @@ -2,7 +2,7 @@ ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll ; RUN: diff %t1.ll %t2.ll ; RUN: not grep cast %t2.ll -; RUN: grep {\\}>} %t2.ll +; RUN: grep "}>" %t2.ll ; END. %struct.anon = type <{ i8, i35, i35, i35 }> diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll index cc34634b32..e7431ec158 100644 --- a/test/Linker/2003-01-30-LinkerRename.ll +++ b/test/Linker/2003-01-30-LinkerRename.ll @@ -1,9 +1,9 @@ ; This fails because the linker renames the external symbol not the internal ; one... -; RUN: echo {define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc +; RUN: echo "define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc ; RUN: llvm-as %s -o %t.2.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {@foo()} | grep -v internal +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "@foo()" | grep -v internal define i32 @foo() { ret i32 0 } diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll index 043457da37..94fb5e0826 100644 --- a/test/Linker/2003-01-30-LinkerTypeRename.ll +++ b/test/Linker/2003-01-30-LinkerTypeRename.ll @@ -1,9 +1,9 @@ ; This fails because the linker renames the non-opaque type not the opaque ; one... -; RUN: echo {%%Ty = type opaque @GV = external global %%Ty*} | llvm-as > %t.1.bc +; RUN: echo "%%Ty = type opaque @GV = external global %%Ty*" | llvm-as > %t.1.bc ; RUN: llvm-as < %s > %t.2.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {%%Ty } | not grep opaque +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "%%Ty " | not grep opaque %Ty = type {i32} diff --git a/test/Linker/2003-04-21-Linkage.ll b/test/Linker/2003-04-21-Linkage.ll deleted file mode 100644 index f6d4c4b03b..0000000000 --- a/test/Linker/2003-04-21-Linkage.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: echo {@X = linkonce global i32 5 \ -; RUN: define linkonce i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc -; RUN: llvm-as %s -o %t.2.bc -; RUN: llvm-link %t.1.bc %t.2.bc -@X = external global i32 - -declare i32 @foo() - -define void @bar() { - load i32* @X - call i32 @foo() - ret void -} - diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll index beaf6ec521..98a943a534 100644 --- a/test/Linker/2003-04-23-LinkOnceLost.ll +++ b/test/Linker/2003-04-23-LinkOnceLost.ll @@ -1,7 +1,7 @@ ; This fails because the linker renames the non-opaque type not the opaque ; one... -; RUN: echo { define linkonce void @foo() \{ ret void \} } | \ +; RUN: echo " define linkonce void @foo() { ret void } " | \ ; RUN: llvm-as -o %t.2.bc ; RUN: llvm-as %s -o %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce diff --git a/test/Linker/2003-04-26-NullPtrLinkProblem.ll b/test/Linker/2003-04-26-NullPtrLinkProblem.ll index d23df1bb59..5e8249b047 100644 --- a/test/Linker/2003-04-26-NullPtrLinkProblem.ll +++ b/test/Linker/2003-04-26-NullPtrLinkProblem.ll @@ -1,7 +1,7 @@ ; This one fails because the LLVM runtime is allowing two null pointers of ; the same type to be created! -; RUN: echo {%%T = type i32} | llvm-as > %t.2.bc +; RUN: echo "%%T = type i32" | llvm-as > %t.2.bc ; RUN: llvm-as %s -o %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc diff --git a/test/Linker/2003-05-15-TypeProblem.ll b/test/Linker/2003-05-15-TypeProblem.ll index 18fcea00a1..c1fe334d15 100644 --- a/test/Linker/2003-05-15-TypeProblem.ll +++ b/test/Linker/2003-05-15-TypeProblem.ll @@ -1,7 +1,7 @@ ; This one fails because the LLVM runtime is allowing two null pointers of ; the same type to be created! -; RUN: echo {%M = type \{ %N*\} %N = type opaque} | llvm-as > %t.2.bc +; RUN: echo "%M = type { %N*} %N = type opaque" | llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll index 80e0a69769..dff861dc4b 100644 --- a/test/Linker/2003-05-31-LinkerRename.ll +++ b/test/Linker/2003-05-31-LinkerRename.ll @@ -4,9 +4,9 @@ ; the function name, we must rename the internal function to something that ; does not conflict. -; RUN: echo { define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc +; RUN: echo " define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc ; RUN: llvm-as < %s > %t.2.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep {@foo(} +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep "@foo(" declare i32 @foo() diff --git a/test/Linker/2003-06-02-TypeResolveProblem.ll b/test/Linker/2003-06-02-TypeResolveProblem.ll index 0b0e9c1908..fa24b6dbfb 100644 --- a/test/Linker/2003-06-02-TypeResolveProblem.ll +++ b/test/Linker/2003-06-02-TypeResolveProblem.ll @@ -1,4 +1,4 @@ -; RUN: echo {%%T = type opaque} | llvm-as > %t.2.bc +; RUN: echo "%%T = type opaque" | llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc diff --git a/test/Linker/2003-06-02-TypeResolveProblem2.ll b/test/Linker/2003-06-02-TypeResolveProblem2.ll index 3f9fd04ae3..3ae23a2080 100644 --- a/test/Linker/2003-06-02-TypeResolveProblem2.ll +++ b/test/Linker/2003-06-02-TypeResolveProblem2.ll @@ -1,4 +1,4 @@ -; RUN: echo {%%T = type i32} | llvm-as > %t.1.bc +; RUN: echo "%%T = type i32" | llvm-as > %t.1.bc ; RUN: llvm-as < %s > %t.2.bc ; RUN: llvm-link %t.1.bc %t.2.bc diff --git a/test/Linker/2003-08-20-OpaqueTypeResolve.ll b/test/Linker/2003-08-20-OpaqueTypeResolve.ll index c0fc620cfa..175146f122 100644 --- a/test/Linker/2003-08-20-OpaqueTypeResolve.ll +++ b/test/Linker/2003-08-20-OpaqueTypeResolve.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as < %s > %t.out1.bc -; RUN: echo {%M = type \{ i32, i32* \} } | llvm-as > %t.out2.bc +; RUN: echo "%M = type { i32, i32* } " | llvm-as > %t.out2.bc ; RUN: llvm-link %t.out1.bc %t.out2.bc %M = type { i32, %N* } diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll index 255cb88daf..e934836a61 100644 --- a/test/Linker/2003-08-23-GlobalVarLinking.ll +++ b/test/Linker/2003-08-23-GlobalVarLinking.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as < %s > %t.out1.bc -; RUN: echo {%%T1 = type opaque %%T2 = type opaque @S = external global \{ i32, %%T1* \} declare void @F(%%T2*)}\ +; RUN: echo "%%T1 = type opaque %%T2 = type opaque @S = external global { i32, %%T1* } declare void @F(%%T2*)"\ ; RUN: | llvm-as > %t.out2.bc ; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque diff --git a/test/Linker/2003-08-24-InheritPtrSize.ll b/test/Linker/2003-08-24-InheritPtrSize.ll index f93c054dec..51d544b83f 100644 --- a/test/Linker/2003-08-24-InheritPtrSize.ll +++ b/test/Linker/2003-08-24-InheritPtrSize.ll @@ -2,8 +2,8 @@ ; specified pointer size should not cause a warning! ; RUN: llvm-as < %s > %t.out1.bc -; RUN: echo {} | llvm-as > %t.out2.bc -; RUN: llvm-link %t.out1.bc %t.out2.bc |& not grep warning +; RUN: echo "" | llvm-as > %t.out2.bc +; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | not grep warning target datalayout = "e-p:64:64" diff --git a/test/Linker/2004-12-03-DisagreeingType.ll b/test/Linker/2004-12-03-DisagreeingType.ll index 570bda87e2..73d7a40550 100644 --- a/test/Linker/2004-12-03-DisagreeingType.ll +++ b/test/Linker/2004-12-03-DisagreeingType.ll @@ -1,7 +1,7 @@ -; RUN: echo {@G = weak global \{\{\{\{double\}\}\}\} zeroinitializer } | \ +; RUN: echo "@G = weak global {{{{double}}}} zeroinitializer " | \ ; RUN: llvm-as > %t.out2.bc ; RUN: llvm-as < %s > %t.out1.bc -; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep {\\}} +; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep "}" ; When linked, the global above should be eliminated, being merged with the ; global below. diff --git a/test/Linker/2005-02-12-ConstantGlobals-2.ll b/test/Linker/2005-02-12-ConstantGlobals-2.ll index 2ceae3146f..30bfafeb13 100644 --- a/test/Linker/2005-02-12-ConstantGlobals-2.ll +++ b/test/Linker/2005-02-12-ConstantGlobals-2.ll @@ -1,8 +1,8 @@ ; Test that a prototype can be marked const, and the definition is allowed ; to be nonconst. -; RUN: echo {@X = external constant i32} | llvm-as > %t.2.bc +; RUN: echo "@X = external constant i32" | llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7} +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7" @X = global i32 7 diff --git a/test/Linker/2005-02-12-ConstantGlobals.ll b/test/Linker/2005-02-12-ConstantGlobals.ll index 60f176b053..93709cf50b 100644 --- a/test/Linker/2005-02-12-ConstantGlobals.ll +++ b/test/Linker/2005-02-12-ConstantGlobals.ll @@ -1,8 +1,8 @@ ; Test that a prototype can be marked const, and the definition is allowed ; to be nonconst. -; RUN: echo {@X = global i32 7} | llvm-as > %t.2.bc +; RUN: echo "@X = global i32 7" | llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7} +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7" @X = external constant i32 ; <i32*> [#uses=0] diff --git a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll index 7d1020ddf2..d7a34c841e 100644 --- a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll +++ b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll @@ -1,7 +1,7 @@ -; RUN: echo { @G = appending global \[0 x i32\] zeroinitializer } | \ +; RUN: echo " @G = appending global [0 x i32] zeroinitializer " | \ ; RUN: llvm-as > %t.out2.bc ; RUN: llvm-as < %s > %t.out1.bc -; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep {@G =} +; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep "@G =" ; When linked, the globals should be merged, and the result should still ; be named '@G'. diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll index df3284bede..eec8f637be 100644 --- a/test/Linker/2006-06-15-GlobalVarAlignment.ll +++ b/test/Linker/2006-06-15-GlobalVarAlignment.ll @@ -1,7 +1,7 @@ ; The linker should choose the largest alignment when linking. -; RUN: echo {@X = global i32 7, align 8} | llvm-as > %t.2.bc +; RUN: echo "@X = global i32 7, align 8" | llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc -; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {align 8} +; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "align 8" @X = weak global i32 7, align 4 diff --git a/test/Linker/2008-06-13-LinkOnceRedefinition.ll b/test/Linker/2008-06-13-LinkOnceRedefinition.ll index 49da96af94..da4b48dcbb 100644 --- a/test/Linker/2008-06-13-LinkOnceRedefinition.ll +++ b/test/Linker/2008-06-13-LinkOnceRedefinition.ll @@ -2,7 +2,7 @@ ; in different modules. ; RUN: llvm-as %s -o %t.foo1.bc ; RUN: llvm-as %s -o %t.foo2.bc -; RUN: echo {define linkonce void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc +; RUN: echo "define linkonce void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc ; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S ; RUN: llvm-link %t.foo1.bc %t.foo3.bc -S define linkonce void @foo() { ret void } diff --git a/test/Linker/2008-06-26-AddressSpace.ll b/test/Linker/2008-06-26-AddressSpace.ll index e3ed385b68..e1d35741e7 100644 --- a/test/Linker/2008-06-26-AddressSpace.ll +++ b/test/Linker/2008-06-26-AddressSpace.ll @@ -2,8 +2,8 @@ ; in different modules. ; RUN: llvm-as %s -o %t.foo1.bc ; RUN: echo | llvm-as -o %t.foo2.bc -; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep {addrspace(2)} -; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep {addrspace(2)} +; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep "addrspace(2)" +; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep "addrspace(2)" ; rdar://6038021 @G = addrspace(2) global i32 256 diff --git a/test/Linker/AppendingLinkage.ll b/test/Linker/AppendingLinkage.ll index 134a42ef21..014ead91bd 100644 --- a/test/Linker/AppendingLinkage.ll +++ b/test/Linker/AppendingLinkage.ll @@ -1,6 +1,6 @@ ; Test that appending linkage works correctly. -; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \ +; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \ ; RUN: llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8 diff --git a/test/Linker/AppendingLinkage2.ll b/test/Linker/AppendingLinkage2.ll index 2c1302f39b..7385efb1f9 100644 --- a/test/Linker/AppendingLinkage2.ll +++ b/test/Linker/AppendingLinkage2.ll @@ -1,6 +1,6 @@ ; Test that appending linkage works correctly when arrays are the same size. -; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \ +; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \ ; RUN: llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8 diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll index 8fdbe508db..716eb3d02e 100644 --- a/test/Linker/ConstantGlobals1.ll +++ b/test/Linker/ConstantGlobals1.ll @@ -1,6 +1,6 @@ ; Test that appending linkage works correctly when arrays are the same size. -; RUN: echo {@X = constant \[1 x i32\] \[i32 8\] } | \ +; RUN: echo "@X = constant [1 x i32] [i32 8] " | \ ; RUN: llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll index ad4428b952..ad0f8e25f2 100644 --- a/test/Linker/ConstantGlobals2.ll +++ b/test/Linker/ConstantGlobals2.ll @@ -1,6 +1,6 @@ ; Test that appending linkage works correctly when arrays are the same size. -; RUN: echo {@X = external global \[1 x i32\] } | \ +; RUN: echo "@X = external global [1 x i32] " | \ ; RUN: llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll index e25529ae1b..5aa26bc29b 100644 --- a/test/Linker/ConstantGlobals3.ll +++ b/test/Linker/ConstantGlobals3.ll @@ -1,6 +1,6 @@ ; Test that appending linkage works correctly when arrays are the same size. -; RUN: echo {@X = external constant \[1 x i32\] } | \ +; RUN: echo "@X = external constant [1 x i32] " | \ ; RUN: llvm-as > %t.2.bc ; RUN: llvm-as < %s > %t.1.bc ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant diff --git a/test/Linker/Inputs/PR11464.a.ll b/test/Linker/Inputs/PR11464.a.ll new file mode 100644 index 0000000000..25a93500de --- /dev/null +++ b/test/Linker/Inputs/PR11464.a.ll @@ -0,0 +1,3 @@ +%bug_type = type opaque +declare i32 @bug_a(%bug_type*) +declare i32 @bug_b(%bug_type*) diff --git a/test/Linker/Inputs/PR11464.b.ll b/test/Linker/Inputs/PR11464.b.ll new file mode 100644 index 0000000000..7ef5a36129 --- /dev/null +++ b/test/Linker/Inputs/PR11464.b.ll @@ -0,0 +1,13 @@ +%bug_type = type { %bug_type* } +%bar = type { i32 } + +define i32 @bug_a(%bug_type* %fp) nounwind uwtable { +entry: + %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0 + ret i32 0 +} + +define i32 @bug_b(%bar* %a) nounwind uwtable { +entry: + ret i32 0 +} diff --git a/test/Linker/Inputs/PR8300.a.ll b/test/Linker/Inputs/PR8300.a.ll new file mode 100644 index 0000000000..c705db3fd0 --- /dev/null +++ b/test/Linker/Inputs/PR8300.a.ll @@ -0,0 +1,2 @@ +%foo2 = type { [8 x i8] } +declare void @zed(%foo2*) diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll new file mode 100644 index 0000000000..9e538f5d28 --- /dev/null +++ b/test/Linker/Inputs/PR8300.b.ll @@ -0,0 +1,9 @@ +%foo = type { [8 x i8] } +%bar = type { [9 x i8] } + +@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*) + +define void @xyz(%bar* %this) { +entry: + ret void +} diff --git a/test/Linker/Inputs/basiclink.a.ll b/test/Linker/Inputs/basiclink.a.ll new file mode 100644 index 0000000000..997932d4fa --- /dev/null +++ b/test/Linker/Inputs/basiclink.a.ll @@ -0,0 +1,2 @@ +define i32* @foo(i32 %x) { ret i32* @baz } +@baz = external global i32 diff --git a/test/Linker/Inputs/basiclink.b.ll b/test/Linker/Inputs/basiclink.b.ll new file mode 100644 index 0000000000..0d2abc771c --- /dev/null +++ b/test/Linker/Inputs/basiclink.b.ll @@ -0,0 +1,6 @@ +declare i32* @foo(...) +define i32* @bar() { + %ret = call i32* (...)* @foo( i32 123 ) + ret i32* %ret +} +@baz = global i32 0 diff --git a/test/Linker/Inputs/linkage.a.ll b/test/Linker/Inputs/linkage.a.ll new file mode 100644 index 0000000000..8a156f669a --- /dev/null +++ b/test/Linker/Inputs/linkage.a.ll @@ -0,0 +1,2 @@ +@X = linkonce global i32 5 +define linkonce i32 @foo() { ret i32 7 } diff --git a/test/Linker/Inputs/linkage.b.ll b/test/Linker/Inputs/linkage.b.ll new file mode 100644 index 0000000000..0ada3f4cb4 --- /dev/null +++ b/test/Linker/Inputs/linkage.b.ll @@ -0,0 +1,10 @@ +@X = external global i32 + +declare i32 @foo() + +define void @bar() { + load i32* @X + call i32 @foo() + ret void +} + diff --git a/test/Linker/PR8300.ll b/test/Linker/PR8300.ll index f0fc1e7a5c..7c03d5bb4e 100644 --- a/test/Linker/PR8300.ll +++ b/test/Linker/PR8300.ll @@ -1,13 +1 @@ -; RUN: echo {%foo2 = type \{ \[8 x i8\] \} \ -; RUN: declare void @zed(%foo2*) } > %t.ll -; RUN: llvm-link %t.ll %s -o %t.bc - -%foo = type { [8 x i8] } -%bar = type { [9 x i8] } - -@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*) - -define void @xyz(%bar* %this) { -entry: - ret void -} +; RUN: llvm-link %S/Inputs/PR8300.a.ll %S/Inputs/PR8300.b.ll -o %t.bc diff --git a/test/Linker/basiclink.ll b/test/Linker/basiclink.ll index afe0320ba9..804329a49c 100644 --- a/test/Linker/basiclink.ll +++ b/test/Linker/basiclink.ll @@ -1,13 +1,6 @@ ; Test linking two functions with different prototypes and two globals ; in different modules. This is for PR411 -; RUN: llvm-as %s -o %t.bar.bc -; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \ -; RUN: @baz = external global i32 } | llvm-as -o %t.foo.bc -; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc +; RUN: llvm-as %S/Inputs/basiclink.a.ll -o %t.foo.bc +; RUN: llvm-as %S/Inputs/basiclink.b.ll -o %t.bar.bc ; RUN: llvm-link %t.foo.bc %t.bar.bc -o %t.bc -declare i32* @foo(...) -define i32* @bar() { - %ret = call i32* (...)* @foo( i32 123 ) - ret i32* %ret -} -@baz = global i32 0 +; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc diff --git a/test/Linker/link-global-to-func.ll b/test/Linker/link-global-to-func.ll index 2fc501dedc..9d969d768d 100644 --- a/test/Linker/link-global-to-func.ll +++ b/test/Linker/link-global-to-func.ll @@ -1,5 +1,5 @@ ; RUN: llvm-as %s -o %t1.bc -; RUN: echo {declare void @__eprintf(i8*, i8*, i32, i8*) noreturn define void @foo() { tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind unreachable }} | llvm-as -o %t2.bc +; RUN: echo "declare void @__eprintf(i8*, i8*, i32, i8*) noreturn define void @foo() { tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind unreachable }" | llvm-as -o %t2.bc ; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf ; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf diff --git a/test/Linker/link-messages.ll b/test/Linker/link-messages.ll index 1faae186dd..4e7ffbc97d 100644 --- a/test/Linker/link-messages.ll +++ b/test/Linker/link-messages.ll @@ -2,7 +2,7 @@ ; that error is printed out. ; RUN: llvm-as %s -o %t.one.bc ; RUN: llvm-as %s -o %t.two.bc -; RUN: not llvm-link %t.one.bc %t.two.bc -o %t.bc |& FileCheck %s +; RUN: not llvm-link %t.one.bc %t.two.bc -o %t.bc 2>&1 | FileCheck %s ; CHECK: symbol multiply defined define i32 @bar() { diff --git a/test/Linker/linkage.ll b/test/Linker/linkage.ll new file mode 100644 index 0000000000..c7309aa875 --- /dev/null +++ b/test/Linker/linkage.ll @@ -0,0 +1,3 @@ +; RUN: llvm-as %S/Inputs/linkage.a.ll -o %t.1.bc +; RUN: llvm-as %S/Inputs/linkage.b.ll -o %t.2.bc +; RUN: llvm-link %t.1.bc %t.2.bc diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll index f411a569d8..a656c8b84b 100644 --- a/test/Linker/module-flags-4-a.ll +++ b/test/Linker/module-flags-4-a.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - |& FileCheck %s +; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - 2>&1 | FileCheck %s ; Test 'require' error. diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll index 2e59ecca70..8d625cd8c9 100644 --- a/test/Linker/module-flags-5-a.ll +++ b/test/Linker/module-flags-5-a.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - |& FileCheck %s +; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - 2>&1 | FileCheck %s ; Test the 'override' error. diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll index c3e0225814..5329c436a4 100644 --- a/test/Linker/module-flags-6-a.ll +++ b/test/Linker/module-flags-6-a.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - |& FileCheck %s +; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - 2>&1 | FileCheck %s ; Test module flags error messages. diff --git a/test/Linker/multiple-merged-structs.ll b/test/Linker/multiple-merged-structs.ll index 348cd89bbc..aa8204dfeb 100644 --- a/test/Linker/multiple-merged-structs.ll +++ b/test/Linker/multiple-merged-structs.ll @@ -1,19 +1,2 @@ -; RUN: echo {%bug_type = type opaque \ -; RUN: declare i32 @bug_a(%bug_type*) \ -; RUN: declare i32 @bug_b(%bug_type*) } > %t.ll -; RUN: llvm-link %t.ll %s +; RUN: llvm-link %S/Inputs/PR11464.a.ll %S/Inputs/PR11464.b.ll ; PR11464 - -%bug_type = type { %bug_type* } -%bar = type { i32 } - -define i32 @bug_a(%bug_type* %fp) nounwind uwtable { -entry: - %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0 - ret i32 0 -} - -define i32 @bug_b(%bar* %a) nounwind uwtable { -entry: - ret i32 0 -} diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll index 0d056891d5..23ba6a100f 100644 --- a/test/Linker/redefinition.ll +++ b/test/Linker/redefinition.ll @@ -2,9 +2,9 @@ ; in different modules. ; RUN: llvm-as %s -o %t.foo1.bc ; RUN: llvm-as %s -o %t.foo2.bc -; RUN: echo {define void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc -; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc |& \ -; RUN: grep {symbol multiply defined} -; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc |& \ -; RUN: grep {symbol multiply defined} +; RUN: echo "define void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc +; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | \ +; RUN: grep "symbol multiply defined" +; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | \ +; RUN: grep "symbol multiply defined" define void @foo() { ret void } diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll index aa38b1264c..3a72a48aae 100644 --- a/test/Linker/weakextern.ll +++ b/test/Linker/weakextern.ll @@ -1,9 +1,9 @@ ; RUN: llvm-as < %s > %t.bc ; RUN: llvm-as < %p/testlink1.ll > %t2.bc ; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc -; RUN: llvm-dis < %t1.bc | grep {kallsyms_names = extern_weak} -; RUN: llvm-dis < %t1.bc | grep {MyVar = external global i32} -; RUN: llvm-dis < %t1.bc | grep {Inte = global i32} +; RUN: llvm-dis < %t1.bc | grep "kallsyms_names = extern_weak" +; RUN: llvm-dis < %t1.bc | grep "MyVar = external global i32" +; RUN: llvm-dis < %t1.bc | grep "Inte = global i32" @kallsyms_names = extern_weak global [0 x i8] ; <[0 x i8]*> [#uses=0] @MyVar = extern_weak global i32 ; <i32*> [#uses=0] diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s index 35acb65d2b..c62413d0ea 100644 --- a/test/MC/ARM/basic-arm-instructions.s +++ b/test/MC/ARM/basic-arm-instructions.s @@ -601,6 +601,12 @@ Lforward: @ CHECK: dsb oshst @ encoding: [0x42,0xf0,0x7f,0xf5] @ CHECK: dsb sy @ encoding: [0x4f,0xf0,0x7f,0xf5] +@ With capitals + dsb SY + dsb OSHST + +@ CHECK: dsb sy @ encoding: [0x4f,0xf0,0x7f,0xf5] +@ CHECK: dsb oshst @ encoding: [0x42,0xf0,0x7f,0xf5] @------------------------------------------------------------------------------ @ EOR @------------------------------------------------------------------------------ @@ -2711,10 +2717,22 @@ Lforward: wfilt yield yieldne - -@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3] -@ CHECK: wfehi @ encoding: [0x02,0xf0,0x20,0x83] -@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3] -@ CHECK: wfilt @ encoding: [0x03,0xf0,0x20,0xb3] -@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3] -@ CHECK: yieldne @ encoding: [0x01,0xf0,0x20,0x13] + hint #5 + hint #4 + hint #3 + hint #2 + hint #1 + hint #0 + +@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3] +@ CHECK: wfehi @ encoding: [0x02,0xf0,0x20,0x83] +@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3] +@ CHECK: wfilt @ encoding: [0x03,0xf0,0x20,0xb3] +@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3] +@ CHECK: yieldne @ encoding: [0x01,0xf0,0x20,0x13] +@ CHECK: hint #5 @ encoding: [0x05,0xf0,0x20,0xe3] +@ CHECK: sev @ encoding: [0x04,0xf0,0x20,0xe3] +@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3] +@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3] +@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3] +@ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3] diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s index 2104be3694..4cfe2f2111 100644 --- a/test/MC/ARM/basic-thumb2-instructions.s +++ b/test/MC/ARM/basic-thumb2-instructions.s @@ -3369,7 +3369,7 @@ _func: @ CHECK: uxth.w r7, r8 @ encoding: [0x1f,0xfa,0x88,0xf7] @------------------------------------------------------------------------------ -@ WFE/WFI/YIELD +@ WFE/WFI/YIELD/HINT @------------------------------------------------------------------------------ wfe wfi @@ -3378,6 +3378,13 @@ _func: wfelt wfige yieldlt + hint #5 + hint.w #5 + hint.w #4 + hint #3 + hint #2 + hint #1 + hint #0 @ CHECK: wfe @ encoding: [0x20,0xbf] @ CHECK: wfi @ encoding: [0x30,0xbf] @@ -3386,6 +3393,13 @@ _func: @ CHECK: wfelt @ encoding: [0x20,0xbf] @ CHECK: wfige @ encoding: [0x30,0xbf] @ CHECK: yieldlt @ encoding: [0x10,0xbf] +@ CHECK: hint #5 @ encoding: [0xaf,0xf3,0x05,0x80] +@ CHECK: hint #5 @ encoding: [0xaf,0xf3,0x05,0x80] +@ CHECK: sev.w @ encoding: [0xaf,0xf3,0x04,0x80] +@ CHECK: wfi.w @ encoding: [0xaf,0xf3,0x03,0x80] +@ CHECK: wfe.w @ encoding: [0xaf,0xf3,0x02,0x80] +@ CHECK: yield.w @ encoding: [0xaf,0xf3,0x01,0x80] +@ CHECK: nop.w @ encoding: [0xaf,0xf3,0x00,0x80] @------------------------------------------------------------------------------ diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s index 7da79c31dc..499e055013 100644 --- a/test/MC/ARM/diagnostics.s +++ b/test/MC/ARM/diagnostics.s @@ -70,8 +70,8 @@ dbg #-1 dbg #16 -@ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] @ Double-check that we're synced up with the right diagnostics. @ CHECK-ERRORS: dbg #16 @@ -86,8 +86,8 @@ @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] @ Out of range immediate for MOV @@ -115,8 +115,8 @@ @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] @ Shifter operand validation for PKH instructions. pkhbt r2, r2, r3, lsl #-1 @@ -315,3 +315,9 @@ @ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255] @ CHECK-ERRORS: ldc2 p2, c8, [r1], { -1 } @ CHECK-ERRORS: ^ + + @ Bad CPS instruction format. + cps f,#1 +@ CHECK-ERRORS: error: invalid operand for instruction +@ CHECK-ERRORS: cps f,#1 +@ CHECK-ERRORS: ^ diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s index dcc62d33c2..08b4ecc9c7 100644 --- a/test/MC/ARM/elf-reloc-condcall.s +++ b/test/MC/ARM/elf-reloc-condcall.s @@ -4,6 +4,8 @@ bleq some_label bl some_label blx some_label + beq some_label + b some_label // OBJ: .rel.text // OBJ: 'r_offset', 0x00000000 @@ -18,6 +20,14 @@ // OBJ-NEXT: 'r_sym', 0x000004 // OBJ-NEXT: 'r_type', 0x1c +// OBJ: 'r_offset', 0x0000000c +// OBJ-NEXT: 'r_sym', 0x000004 +// OBJ-NEXT: 'r_type', 0x1d + +// OBJ: 'r_offset', 0x00000010 +// OBJ-NEXT: 'r_sym', 0x000004 +// OBJ-NEXT: 'r_type', 0x1d + // OBJ: .symtab // OBJ: Symbol 4 -// OBJ-NEXT: some_label
\ No newline at end of file +// OBJ-NEXT: some_label diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s index e38f53c6cf..d94c686e2f 100644 --- a/test/MC/ARM/thumb2-diagnostics.s +++ b/test/MC/ARM/thumb2-diagnostics.s @@ -40,5 +40,5 @@ @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction @ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction -@ CHECK-ERRORS: error: invalid operand for instruction +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] +@ CHECK-ERRORS: error: immediate operand must be in the range [0,15] diff --git a/test/MC/ARM/thumb2-mclass.s b/test/MC/ARM/thumb2-mclass.s index df4e4c93b9..b7af723620 100644 --- a/test/MC/ARM/thumb2-mclass.s +++ b/test/MC/ARM/thumb2-mclass.s @@ -86,13 +86,13 @@ @ CHECK: msr xpsr, r0 @ encoding: [0x80,0xf3,0x03,0x88] @ CHECK: msr xpsr_g, r0 @ encoding: [0x80,0xf3,0x03,0x84] @ CHECK: msr xpsr_nzcvqg, r0 @ encoding: [0x80,0xf3,0x03,0x8c] -@ CHECK: msr ipsr, r0 @ encoding: [0x80,0xf3,0x05,0x80] -@ CHECK: msr epsr, r0 @ encoding: [0x80,0xf3,0x06,0x80] -@ CHECK: msr iepsr, r0 @ encoding: [0x80,0xf3,0x07,0x80] -@ CHECK: msr msp, r0 @ encoding: [0x80,0xf3,0x08,0x80] -@ CHECK: msr psp, r0 @ encoding: [0x80,0xf3,0x09,0x80] -@ CHECK: msr primask, r0 @ encoding: [0x80,0xf3,0x10,0x80] -@ CHECK: msr basepri, r0 @ encoding: [0x80,0xf3,0x11,0x80] -@ CHECK: msr basepri_max, r0 @ encoding: [0x80,0xf3,0x12,0x80] -@ CHECK: msr faultmask, r0 @ encoding: [0x80,0xf3,0x13,0x80] -@ CHECK: msr control, r0 @ encoding: [0x80,0xf3,0x14,0x80] +@ CHECK: msr ipsr, r0 @ encoding: [0x80,0xf3,0x05,0x88] +@ CHECK: msr epsr, r0 @ encoding: [0x80,0xf3,0x06,0x88] +@ CHECK: msr iepsr, r0 @ encoding: [0x80,0xf3,0x07,0x88] +@ CHECK: msr msp, r0 @ encoding: [0x80,0xf3,0x08,0x88] +@ CHECK: msr psp, r0 @ encoding: [0x80,0xf3,0x09,0x88] +@ CHECK: msr primask, r0 @ encoding: [0x80,0xf3,0x10,0x88] +@ CHECK: msr basepri, r0 @ encoding: [0x80,0xf3,0x11,0x88] +@ CHECK: msr basepri_max, r0 @ encoding: [0x80,0xf3,0x12,0x88] +@ CHECK: msr faultmask, r0 @ encoding: [0x80,0xf3,0x13,0x88] +@ CHECK: msr control, r0 @ encoding: [0x80,0xf3,0x14,0x88] diff --git a/test/MC/AsmParser/macro-err1.s b/test/MC/AsmParser/macro-err1.s new file mode 100644 index 0000000000..924deb0cf6 --- /dev/null +++ b/test/MC/AsmParser/macro-err1.s @@ -0,0 +1,10 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t +// RUN: FileCheck < %t %s + +.macro foo bar + .long \bar +.endm + +foo 42, 42 + +// CHECK: Too many arguments diff --git a/test/MC/AsmParser/macro-irp.s b/test/MC/AsmParser/macro-irp.s new file mode 100644 index 0000000000..a368b7446d --- /dev/null +++ b/test/MC/AsmParser/macro-irp.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s + +.irp reg,%eax,%ebx + pushl \reg +.endr + +// CHECK: pushl %eax +// CHECK: pushl %ebx diff --git a/test/MC/AsmParser/macro-irpc.s b/test/MC/AsmParser/macro-irpc.s new file mode 100644 index 0000000000..ea5efbfb24 --- /dev/null +++ b/test/MC/AsmParser/macro-irpc.s @@ -0,0 +1,9 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s + +.irpc foo,123 + .long \foo +.endr + +// CHECK: long 1 +// CHECK: long 2 +// CHECK: long 3 diff --git a/test/MC/AsmParser/purgem.s b/test/MC/AsmParser/purgem.s index 46004eeda3..c76c1c6d27 100644 --- a/test/MC/AsmParser/purgem.s +++ b/test/MC/AsmParser/purgem.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple i386-unknown-unknown %s |& FileCheck %s +# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s .macro foo .err diff --git a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt index a0d5944a05..f7acce9fc0 100644 --- a/test/MC/Disassembler/ARM/invalid-BFI-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-BFI-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=60 Name=BFI Format=ARM_FORMAT_DPFRM(4) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt index d2d424c1de..356c376fed 100644 --- a/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=2249 Name=tBcc Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt index 10748e9b12..bc8b7e10b9 100644 --- a/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # invalid imod value (0b01) 0xc0 0x67 0x4 0xf1 diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt index 8146b5cb44..842a52b729 100644 --- a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding" # invalid (imod, M, iflags) combination 0x93 0x00 0x02 0xf1 diff --git a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt index b4414859c9..83961569bd 100644 --- a/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-DMB-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1908 Name=t2DMB Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt index de042a97c6..2c6e6a7fa2 100644 --- a/test/MC/Disassembler/ARM/invalid-DSB-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-DSB-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=102 Name=DSB Format=ARM_FORMAT_MISCFRM(26) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt index 6174e92c47..4297c016e8 100644 --- a/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-IT-CBNZ-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding" # CBZ / CBNZ not allowed in IT block. diff --git a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt index 17e25ead42..733895d6af 100644 --- a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt +++ b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep und +# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown 2>&1 | grep und # rdar://10841671 0xe3 0xbf diff --git a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt b/test/MC/Disassembler/ARM/invalid-IT-thumb.txt index 9b571b33f5..1a8ff48cd9 100644 --- a/test/MC/Disassembler/ARM/invalid-IT-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-IT-thumb.txt @@ -1,3 +1,3 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown 2>&1 | grep "potentially undefined instruction encoding" 0xff 0xbf 0x6b 0x80 0x00 0x75 diff --git a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt index 0b0426b2da..6cff09e719 100644 --- a/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=0 Name=PHI Format=(42) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt index a42b24880e..7d8c492649 100644 --- a/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-LDM-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding" # Writeback is not allowed is Rn is in the target register list. diff --git a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt index 6b695b95b2..68d22debb0 100644 --- a/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding" # Opcode=140 Name=LDRB_POST Format=ARM_FORMAT_LDFRM(6) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt index 7ea1b46795..4df5309b13 100644 --- a/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1930 Name=t2LDRD_PRE Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt index eef2c45db5..0cff28ad2b 100644 --- a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # XFAIL: * # LDR_PRE/POST has encoding Inst{4} = 0. diff --git a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt index e42e0de9b9..30cb727ece 100644 --- a/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "potentially undefined instruction encoding" # Opcode=165 Name=LDR_PRE Format=ARM_FORMAT_LDFRM(6) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt index 23a0b85f36..7b7286af12 100644 --- a/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # LDR (register) has encoding Inst{4} = 0. 0xba 0xae 0x9f 0x57 diff --git a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt index 8343d549e1..bb4b06c90a 100644 --- a/test/MC/Disassembler/ARM/invalid-MCR-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MCR-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=171 Name=MCR Format=ARM_FORMAT_BRFRM(2) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt index 235952fc35..528563acb3 100644 --- a/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=185 Name=MOVTi16 Format=ARM_FORMAT_DPFRM(4) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt index 01c1466a28..41ec53f3c7 100644 --- a/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MOVr-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=0 Name=PHI Format=(42) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt index 757d167594..e5f2a5ecbc 100644 --- a/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt index ba488776c0..3f4c1e5ff9 100644 --- a/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MOVs-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=0 Name=PHI Format=(42) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt b/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt index aaae6ce2e4..c20ce542a6 100644 --- a/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MRRC2-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: invalid instruction encoding 0x00 0x1a 0x50 0xfc diff --git a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt index 3765b1f5c0..901667ab90 100644 --- a/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-MSRi-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=206 Name=MSRi Format=ARM_FORMAT_BRFRM(2) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt index cffd86dc1a..499aa868f9 100644 --- a/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=134 Name=LDMIA Format=ARM_FORMAT_LDSTMULFRM(10) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt index 9e16536052..7bc97d5260 100644 --- a/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-SBFX-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=271 Name=SBFX Format=ARM_FORMAT_DPFRM(4) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt index 91f3d58b4c..fe4f43aadd 100644 --- a/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=284 Name=SMLAD Format=ARM_FORMAT_MULFRM(1) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt index fc5c711a23..eedd05cea6 100644 --- a/test/MC/Disassembler/ARM/invalid-SRS-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-SRS-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=0 Name=PHI Format=(42) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt index ca16724c7a..3d5235de55 100644 --- a/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=2313 Name=tSTMIA_UPD Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt index 400d44ce8c..f67f38e28e 100644 --- a/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-SXTB-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=390 Name=SXTBr_rot Format=ARM_FORMAT_EXTFRM(14) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt index c7cbd84487..f57c48f0e2 100644 --- a/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=419 Name=UMAAL Format=ARM_FORMAT_MULFRM(1) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt index ccf6d9f323..5ba7d618bf 100644 --- a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30) diff --git a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt index bab32ca171..58def05f27 100644 --- a/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=871 Name=VLD3DUPd32_UPD Format=ARM_FORMAT_NLdSt(30) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt index 887b983edd..54fcadb2a4 100644 --- a/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding" # core registers out of range 0xa5 0xba 0x72 0xed diff --git a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt index 7897142cae..f961c64f7d 100644 --- a/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-VQADD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=1225 Name=VQADDsv16i8 Format=ARM_FORMAT_N3Reg(37) diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt index 8ff3a2b9bd..2d2a62811a 100644 --- a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt index b9d5d33f2c..07a1c7aac6 100644 --- a/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=1641 Name=VST2b32_UPD Format=ARM_FORMAT_NLdSt(30) diff --git a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt index df0a642f44..c9f1cf12d1 100644 --- a/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1894 Name=t2Bcc Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt index e1f841b86d..eb415f755e 100644 --- a/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1922 Name=t2LDRBT Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt index 7c0efab383..6c13560a31 100644 --- a/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=1934 Name=t2LDREXD Format=ARM_FORMAT_THUMBFRM(25) diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt index a63d1214f2..7f84e08130 100644 --- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1953 Name=t2LDRSHi12 Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt index f126ff04fb..e44cf952cd 100644 --- a/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=1954 Name=t2LDRSHi8 Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt index b3daa9a429..8c0d48bd56 100644 --- a/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2PUSH-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding" # SP and PC are not allowed in the register list on STM instructions in Thumb2. diff --git a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt index 2198efc2d2..64ba3685cd 100644 --- a/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=2124 Name=t2STRD_PRE Format=ARM_FORMAT_THUMBFRM(25) diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt index 3f406d4948..243c11d561 100644 --- a/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 2>&1 | grep "invalid instruction encoding" # XFAIL: * # Opcode=2127 Name=t2STREXB Format=ARM_FORMAT_THUMBFRM(25) diff --git a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt index 0f9a16ee54..7a7c4a5c54 100644 --- a/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=2128 Name=t2STREXD Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt index 548ad056e6..2ad3e7df9c 100644 --- a/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt +++ b/test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 2>&1 | grep "invalid instruction encoding" # Opcode=2137 Name=t2STR_POST Format=ARM_FORMAT_THUMBFRM(25) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/ldrd-armv4.txt b/test/MC/Disassembler/ARM/ldrd-armv4.txt index bb87ade786..f2fff3f21c 100644 --- a/test/MC/Disassembler/ARM/ldrd-armv4.txt +++ b/test/MC/Disassembler/ARM/ldrd-armv4.txt @@ -1,5 +1,5 @@ -# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi |& FileCheck %s -check-prefix=V4 -# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi |& FileCheck %s -check-prefix=V5TE +# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=V4 +# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=V5TE # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 # ------------------------------------------------------------------------------------------------- diff --git a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt index 275bae2fa2..d5c8cbba9d 100644 --- a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0x1f 0x12 0xb0 0x00 diff --git a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt index 635b66ea43..d251eb4a30 100644 --- a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0xd1 0xf1 0x5f 0x01 diff --git a/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt b/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt index 6f1da8eb23..d0cb520e32 100644 --- a/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-AExtI-arm.txt @@ -1,5 +1,5 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s -check-prefix=CHECK-WARN -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s -check-prefix=CHECK-WARN +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK-WARN: potentially undefined # CHECK-WARN: 0x74 0x03 0xaf 0x06 diff --git a/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt b/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt index dac4390cde..554ae53076 100644 --- a/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-AI1cmp-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0x01 0x10 0x50 0x03 diff --git a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt index ed5e350c13..66073a81be 100644 --- a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0xff 0x00 0xb9 0x00 diff --git a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt index a8f54f7127..572d84474c 100644 --- a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 # ------------------------------------------------------------------------------------------------- diff --git a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt index f7d6bc6edc..9c269537de 100644 --- a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt +++ b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt index 26b286dbf4..439aaed097 100644 --- a/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-MRRC2-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0x00 0x10 0x51 0xfc diff --git a/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt index 3e472cdbfb..d7853410d9 100644 --- a/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-MRS-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # CHECK: warning: potentially undefined # CHECK: 0x00 0xf0 0x0f 0x01 diff --git a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt index 3db86cc44f..472868fff6 100644 --- a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0x93 0x12 0x01 0x00 diff --git a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt index 5b13610949..fdfda6df83 100644 --- a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt index d7939c1918..a2a87703d0 100644 --- a/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-SEL-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0xb4 0x38 0x80 0x06 diff --git a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt index 8ec49cad34..741d0590ab 100644 --- a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # CHECK: warning: potentially undefined # CHECK: shadd16 r5, r7, r0 diff --git a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt index 874378ed02..832aa3ffa3 100644 --- a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt index fef6125d28..5e62802a3a 100644 --- a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt index 4c4c9abed2..85b52dd6aa 100644 --- a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s # Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 diff --git a/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt b/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt index 64bb171bf8..eef5d9fa97 100644 --- a/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt +++ b/test/MC/Disassembler/ARM/unpredictable-swp-arm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi 2>&1 | FileCheck %s # CHECK: potentially undefined # CHECK: 0x9f 0x10 0x03 0x01 diff --git a/test/MC/Disassembler/ARM/unpredictables-thumb.txt b/test/MC/Disassembler/ARM/unpredictables-thumb.txt index e7645f0a59..925dcd38b6 100644 --- a/test/MC/Disassembler/ARM/unpredictables-thumb.txt +++ b/test/MC/Disassembler/ARM/unpredictables-thumb.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=thumbv7 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=thumbv7 2>&1 | FileCheck %s 0x01 0x47 # CHECK: 3:1: warning: potentially undefined diff --git a/test/MC/Disassembler/X86/enhanced.txt b/test/MC/Disassembler/X86/enhanced.txt index 1922dc5eda..deff735b69 100644 --- a/test/MC/Disassembler/X86/enhanced.txt +++ b/test/MC/Disassembler/X86/enhanced.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s # CHECK: [o:jne][w: ][0-p:-][0-l:10=10] <br> 0:[RIP/112](pc)=18446744073709551606 0x0f 0x85 0xf6 0xff 0xff 0xff diff --git a/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt b/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt index 9feb54c517..31a3804dec 100644 --- a/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt +++ b/test/MC/Disassembler/X86/invalid-VEX-vvvv.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | grep "invalid instruction encoding" # This instruction would decode as movmskps if the vvvv field in the VEX prefix was all 1s. 0xc5 0xf0 0x50 0xc0 diff --git a/test/MC/Disassembler/X86/invalid-cmp-imm.txt b/test/MC/Disassembler/X86/invalid-cmp-imm.txt index bf8699b24f..7b2ea2aa06 100644 --- a/test/MC/Disassembler/X86/invalid-cmp-imm.txt +++ b/test/MC/Disassembler/X86/invalid-cmp-imm.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding} +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | grep "invalid instruction encoding" # This instruction would decode as cmpordps if the immediate byte was less than 8. 0x0f 0xc2 0xc7 0x08 diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index c0e77d0698..712c95aa72 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -725,6 +725,30 @@ # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21 +# CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 +0xc4 0xe2 0xe9 0x92 0x04 0x4f + +# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x92 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x93 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 +0xc4 0x02 0x2d 0x93 0x04 0x4f + +# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +0xc4 0xe2 0xe9 0x90 0x04 0x4f + +# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x90 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x91 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +0xc4 0x02 0x2d 0x91 0x04 0x4f + # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling # CHECK: lock # CHECK-NEXT: xaddq %rcx, %rbx diff --git a/test/MC/Disassembler/X86/truncated-input.txt b/test/MC/Disassembler/X86/truncated-input.txt index 34cf0382a7..83be1ca5db 100644 --- a/test/MC/Disassembler/X86/truncated-input.txt +++ b/test/MC/Disassembler/X86/truncated-input.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& FileCheck %s +# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s # CHECK: warning 0x00 diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll new file mode 100644 index 0000000000..23ec53a2e2 --- /dev/null +++ b/test/MC/Mips/elf-N64.ll @@ -0,0 +1,39 @@ +; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - | elf-dump --dump-section-data | FileCheck %s + +; Check for N64 relocation production. +; +; ModuleID = '../hello.c' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v64:64:64-n32" +target triple = "mips64el-unknown-linux" + +@str = private unnamed_addr constant [12 x i8] c"hello world\00" + +define i32 @main() nounwind { +entry: +; Check that the appropriate relocations were created. + +; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 +; CHECK: ('r_type3', 0x05) +; CHECK-NEXT: ('r_type2', 0x18) +; CHECK-NEXT: ('r_type', 0x07) + +; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 +; CHECK: ('r_type3', 0x06) +; CHECK-NEXT: ('r_type2', 0x18) +; CHECK-NEXT: ('r_type', 0x07) + +; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE +; CHECK: ('r_type3', 0x00) +; CHECK-NEXT: ('r_type2', 0x00) +; CHECK-NEXT: ('r_type', 0x14) + +; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE +; CHECK: ('r_type3', 0x00) +; CHECK-NEXT: ('r_type2', 0x00) +; CHECK-NEXT: ('r_type', 0x15) + + %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0)) + ret i32 0 + +} +declare i32 @puts(i8* nocapture) nounwind diff --git a/test/MC/Mips/elf-objdump.s b/test/MC/Mips/elf-objdump.s new file mode 100644 index 0000000000..6a5c2a5bf6 --- /dev/null +++ b/test/MC/Mips/elf-objdump.s @@ -0,0 +1,11 @@ +// 32 bit big endian +// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s +// 32 bit little endian +// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s +// 64 bit big endian +// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s +// 64 bit little endian +// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s + +// We just want to see if llvm-objdump works at all. +// CHECK: .text diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s index 7a79fa066b..ffc3b112e5 100644 --- a/test/MC/Mips/elf_basic.s +++ b/test/MC/Mips/elf_basic.s @@ -30,3 +30,6 @@ // CHECK-LE64: ('e_indent[EI_CLASS]', 0x02) // This is little endian. // CHECK-LE64: ('e_indent[EI_DATA]', 0x01) + +// Check that we are setting EI_OSABI to ELFOSABI_LINUX. +// CHECK-LE64: ('e_indent[EI_OSABI]', 0x03) diff --git a/test/MC/X86/x86-32-avx.s b/test/MC/X86/x86-32-avx.s index e13a8712f7..9a7a50687e 100644 --- a/test/MC/X86/x86-32-avx.s +++ b/test/MC/X86/x86-32-avx.s @@ -2603,11 +2603,11 @@ // CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] vcvttpd2dq %xmm1, %xmm5 -// CHECK: vcvttpd2dq %ymm2, %xmm5 +// CHECK: vcvttpd2dqy %ymm2, %xmm5 // CHECK: encoding: [0xc5,0xfd,0xe6,0xea] vcvttpd2dq %ymm2, %xmm5 -// CHECK: vcvttpd2dqx %xmm1, %xmm5 +// CHECK: vcvttpd2dq %xmm1, %xmm5 // CHECK: encoding: [0xc5,0xf9,0xe6,0xe9] vcvttpd2dqx %xmm1, %xmm5 @@ -2623,11 +2623,11 @@ // CHECK: encoding: [0xc5,0xfd,0xe6,0x08] vcvttpd2dqy (%eax), %xmm1 -// CHECK: vcvtpd2ps %ymm2, %xmm5 +// CHECK: vcvtpd2psy %ymm2, %xmm5 // CHECK: encoding: [0xc5,0xfd,0x5a,0xea] vcvtpd2ps %ymm2, %xmm5 -// CHECK: vcvtpd2psx %xmm1, %xmm5 +// CHECK: vcvtpd2ps %xmm1, %xmm5 // CHECK: encoding: [0xc5,0xf9,0x5a,0xe9] vcvtpd2psx %xmm1, %xmm5 @@ -2643,7 +2643,7 @@ // CHECK: encoding: [0xc5,0xfd,0x5a,0x08] vcvtpd2psy (%eax), %xmm1 -// CHECK: vcvtpd2dq %ymm2, %xmm5 +// CHECK: vcvtpd2dqy %ymm2, %xmm5 // CHECK: encoding: [0xc5,0xff,0xe6,0xea] vcvtpd2dq %ymm2, %xmm5 @@ -2655,7 +2655,7 @@ // CHECK: encoding: [0xc5,0xff,0xe6,0x08] vcvtpd2dqy (%eax), %xmm1 -// CHECK: vcvtpd2dqx %xmm1, %xmm5 +// CHECK: vcvtpd2dq %xmm1, %xmm5 // CHECK: encoding: [0xc5,0xfb,0xe6,0xe9] vcvtpd2dqx %xmm1, %xmm5 diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s index bd5559a528..b9943bafe7 100644 --- a/test/MC/X86/x86_64-avx-encoding.s +++ b/test/MC/X86/x86_64-avx-encoding.s @@ -3368,11 +3368,11 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 // CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] vcvttpd2dq %xmm11, %xmm10 -// CHECK: vcvttpd2dq %ymm12, %xmm10 +// CHECK: vcvttpd2dqy %ymm12, %xmm10 // CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4] vcvttpd2dq %ymm12, %xmm10 -// CHECK: vcvttpd2dqx %xmm11, %xmm10 +// CHECK: vcvttpd2dq %xmm11, %xmm10 // CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3] vcvttpd2dqx %xmm11, %xmm10 @@ -3388,11 +3388,11 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 // CHECK: encoding: [0xc5,0x7d,0xe6,0x18] vcvttpd2dqy (%rax), %xmm11 -// CHECK: vcvtpd2ps %ymm12, %xmm10 +// CHECK: vcvtpd2psy %ymm12, %xmm10 // CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4] vcvtpd2ps %ymm12, %xmm10 -// CHECK: vcvtpd2psx %xmm11, %xmm10 +// CHECK: vcvtpd2ps %xmm11, %xmm10 // CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3] vcvtpd2psx %xmm11, %xmm10 @@ -3408,7 +3408,7 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 // CHECK: encoding: [0xc5,0x7d,0x5a,0x18] vcvtpd2psy (%rax), %xmm11 -// CHECK: vcvtpd2dq %ymm12, %xmm10 +// CHECK: vcvtpd2dqy %ymm12, %xmm10 // CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4] vcvtpd2dq %ymm12, %xmm10 @@ -3420,7 +3420,7 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11 // CHECK: encoding: [0xc5,0x7f,0xe6,0x18] vcvtpd2dqy (%rax), %xmm11 -// CHECK: vcvtpd2dqx %xmm11, %xmm10 +// CHECK: vcvtpd2dq %xmm11, %xmm10 // CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3] vcvtpd2dqx %xmm11, %xmm10 @@ -4121,3 +4121,35 @@ _foo: _foo2: nop vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0 + +// CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] + vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 + +// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f] + vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f] + vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f] + vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 + +// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f] + vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 + +// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f] + vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 diff --git a/test/Makefile b/test/Makefile index ac4eb7dd8c..483db23d36 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,9 +12,6 @@ DIRS = all:: check-local -# 'lit' is the default test runner. -check-local:: check-local-lit - # Include other test rules include Makefile.tests @@ -27,7 +24,6 @@ $(warning GREP_OPTIONS environment variable may interfere with test results) endif ifdef VERBOSE -RUNTESTFLAGS := $(VERBOSE) LIT_ARGS := -v else LIT_ARGS := -s -v @@ -42,7 +38,6 @@ ifdef TESTSUITE LIT_TESTSUITE := $(TESTSUITE) CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE)) CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE)) -RUNTESTFLAGS += --tool $(CLEANED_TESTSUITE) else LIT_TESTSUITE := . endif @@ -54,8 +49,8 @@ endif # Check what to run for -all. LIT_ALL_TESTSUITES := $(LIT_TESTSUITE) -extra-lit-site-cfgs:: -.PHONY: extra-lit-site-cfgs +extra-site-cfgs:: +.PHONY: extra-site-cfgs ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),) ifndef TESTSUITE @@ -63,21 +58,11 @@ ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/Makefile && echo OK), OK) LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test # Force creation of Clang's lit.site.cfg. -clang-lit-site-cfg: FORCE +clang-site-cfg: FORCE $(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg Unit/lit.site.cfg -extra-lit-site-cfgs:: clang-lit-site-cfg -endif -endif +extra-site-cfgs:: clang-site-cfg endif - -IGNORE_TESTS := - -ifndef RUNLLVM2CPP -IGNORE_TESTS += llvm2cpp.exp endif - -ifdef IGNORE_TESTS -RUNTESTFLAGS += --ignore "$(strip $(IGNORE_TESTS))" endif # ulimits like these are redundantly enforced by the buildbots, so @@ -94,21 +79,14 @@ ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ; endif # AuroraUX endif # SunOS -ifneq ($(RUNTEST),) -check-local-dg:: site.exp - ( $(ULIMIT) \ - PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \ - $(RUNTEST) $(RUNTESTFLAGS) ) -else -check-local-dg:: site.exp - @echo "*** dejagnu not found. Make sure 'runtest' is in your PATH, then reconfigure LLVM." -endif - -check-local-lit:: lit.site.cfg Unit/lit.site.cfg +check-local:: lit.site.cfg Unit/lit.site.cfg ( $(ULIMIT) \ $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) ) -check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs +# This is a legacy alias dating from when both DejaGNU and lit were in use. +check-local-lit:: check-local + +check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs ( $(ULIMIT) \ $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) ) @@ -129,44 +107,22 @@ endif FORCE: -site.exp: FORCE - @echo 'Making a new site.exp file...' - @echo '## Autogenerated by LLVM configuration.' > site.tmp - @echo '# Do not edit!' >> site.tmp - @echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp - @echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp - @echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp - @echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp - @echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp - @echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp - @echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp - @echo 'set objdir "$(LLVM_OBJ_ROOT)/test"' >>site.tmp - @echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp - @echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp - @echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp - @echo 'set valgrind "$(VALGRIND)"' >> site.tmp - @echo 'set grep "$(GREP)"' >>site.tmp - @echo 'set gas "$(GAS)"' >>site.tmp - @echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp - @test ! -f site.exp || \ - sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp - @-rm -f site.bak - @test ! -f site.exp || mv site.exp site.bak - @mv site.tmp site.exp - ifeq ($(DISABLE_ASSERTIONS),1) ENABLE_ASSERTIONS=0 else ENABLE_ASSERTIONS=1 endif -lit.site.cfg: site.exp +lit.site.cfg: FORCE @echo "Making LLVM 'lit.site.cfg' file..." - @$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp + @$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp + @$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp @$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp @$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp - @$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp + @$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp + @$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp @$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp + @$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp @$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp @@ -181,7 +137,6 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE @$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > unit.tmp @$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> unit.tmp @$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> unit.tmp - @$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> unit.tmp @$(ECHOPATH) s=@LLVM_BUILD_MODE@=$(BuildMode)=g >> unit.tmp @$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> unit.tmp @$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> unit.tmp diff --git a/test/Object/Inputs/trivial-object-test.elf-hexagon b/test/Object/Inputs/trivial-object-test.elf-hexagon Binary files differnew file mode 100644 index 0000000000..566fa30012 --- /dev/null +++ b/test/Object/Inputs/trivial-object-test.elf-hexagon diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test index c4b564e723..a394a23a7e 100644 --- a/test/Object/objdump-relocations.test +++ b/test/Object/objdump-relocations.test @@ -6,6 +6,8 @@ RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-i386 \ RUN: | FileCheck %s -check-prefix ELF-i386 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \ RUN: | FileCheck %s -check-prefix ELF-x86-64 +RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \ +RUN: | FileCheck %s -check-prefix ELF-hexagon COFF-i386: .text COFF-i386: IMAGE_REL_I386_DIR32 L_.str @@ -26,3 +28,11 @@ ELF-x86-64: .text ELF-x86-64: R_X86_64_32S .rodata.str1.1 ELF-x86-64: R_X86_64_PC32 puts ELF-x86-64: R_X86_64_PC32 SomeOtherFunction + +ELF-hexagon: .text +ELF-hexagon: R_HEX_GOTREL_HI16 .main +ELF-hexagon: R_HEX_GOTREL_LO16 .main +ELF-hexagon: R_HEX_HI16 puts +ELF-hexagon: R_HEX_LO16 puts +ELF-hexagon: R_HEX_B15_PCREL testf +ELF-hexagon: R_HEX_B22_PCREL puts diff --git a/test/Other/2003-02-19-LoopInfoNestingBug.ll b/test/Other/2003-02-19-LoopInfoNestingBug.ll index 13f8351637..b807c44400 100644 --- a/test/Other/2003-02-19-LoopInfoNestingBug.ll +++ b/test/Other/2003-02-19-LoopInfoNestingBug.ll @@ -3,7 +3,7 @@ ; and instead nests it just inside loop "Top" ; ; RUN: opt < %s -analyze -loops | \ -; RUN: grep { Loop at depth 3 containing: %Inner<header><latch><exiting>} +; RUN: grep " Loop at depth 3 containing: %Inner<header><latch><exiting>" ; define void @test() { br label %Top diff --git a/test/Other/2008-10-15-MissingSpace.ll b/test/Other/2008-10-15-MissingSpace.ll index d16ea72c2b..cac696ed6f 100644 --- a/test/Other/2008-10-15-MissingSpace.ll +++ b/test/Other/2008-10-15-MissingSpace.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | not grep {void@} +; RUN: llvm-as < %s | llvm-dis | not grep "void@" ; PR2894 declare void @g() define void @f() { diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll index 40a01cc314..1d207c75c6 100644 --- a/test/Other/close-stderr.ll +++ b/test/Other/close-stderr.ll @@ -1,7 +1,5 @@ -; RUN: sh -c "\ -; RUN: opt --reject-this-option 2>&-; echo \$?; \ -; RUN: opt -o /dev/null /dev/null 2>&-; echo \$?; \ -; RUN: " | FileCheck %s +; RUN: sh -c 'opt --reject-this-option 2>&-; echo $?; opt -o /dev/null /dev/null 2>&-; echo $?;' \ +; RUN: | FileCheck %s ; CHECK: {{^1$}} ; CHECK: {{^0$}} ; XFAIL: vg_leak diff --git a/test/Other/invalid-commandline-option.ll b/test/Other/invalid-commandline-option.ll index 60840fa010..583d4496dc 100644 --- a/test/Other/invalid-commandline-option.ll +++ b/test/Other/invalid-commandline-option.ll @@ -1,3 +1,3 @@ -; RUN: not opt --foo |& grep {Unknown command line argument} +; RUN: not opt --foo 2>&1 | grep "Unknown command line argument" ; there is no --foo diff --git a/test/Other/lint.ll b/test/Other/lint.ll index ca2b1a336a..c84f56f8f6 100644 --- a/test/Other/lint.ll +++ b/test/Other/lint.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -lint -disable-output < %s |& FileCheck %s +; RUN: opt -basicaa -lint -disable-output < %s 2>&1 | FileCheck %s target datalayout = "e-p:64:64:64" declare fastcc void @bar() diff --git a/test/Other/optimize-options.ll b/test/Other/optimize-options.ll index 5b1fe52ac1..888a78fd9d 100644 --- a/test/Other/optimize-options.ll +++ b/test/Other/optimize-options.ll @@ -1,8 +1,8 @@ -;RUN: opt -S -O1 -debug-pass=Arguments |& FileCheck %s -;RUN: opt -S -O2 -debug-pass=Arguments |& FileCheck %s -;RUN: opt -S -Os -debug-pass=Arguments |& FileCheck %s -;RUN: opt -S -Oz -debug-pass=Arguments |& FileCheck %s -;RUN: opt -S -O3 -debug-pass=Arguments |& FileCheck %s +;RUN: opt -S -O1 -debug-pass=Arguments 2>&1 | FileCheck %s +;RUN: opt -S -O2 -debug-pass=Arguments 2>&1 | FileCheck %s +;RUN: opt -S -Os -debug-pass=Arguments 2>&1 | FileCheck %s +;RUN: opt -S -Oz -debug-pass=Arguments 2>&1 | FileCheck %s +;RUN: opt -S -O3 -debug-pass=Arguments 2>&1 | FileCheck %s ; Just check that we get a non-empty set of passes for each -O opton. ;CHECK: Pass Arguments: {{.*}} -print-module diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump index 58ca177328..69cdacde45 100755 --- a/test/Scripts/elf-dump +++ b/test/Scripts/elf-dump @@ -15,6 +15,7 @@ class Reader: self.file = open(path, "rb") self.isLSB = None self.is64Bit = None + self.isN64 = False def seek(self, pos): self.file.seek(pos) @@ -122,15 +123,28 @@ def dumpRel(f, section, dumprela = False): f.seek(section.sh_offset[0] + index * section.sh_entsize[0]) print " # Relocation %s" % index print " (('r_offset', %s)" % common_dump.HexDump(f.readWord()) - r_info = f.readWord()[0] - if f.is64Bit: - r_sym = (r_info >> 32, 32) - r_type = (r_info & 0xffffffff, 32) + + if f.isN64: + r_sym = f.read32() + r_ssym = f.read8() + r_type3 = f.read8() + r_type2 = f.read8() + r_type = f.read8() + print " ('r_sym', %s)" % common_dump.HexDump(r_sym) + print " ('r_ssym', %s)" % common_dump.HexDump(r_ssym) + print " ('r_type3', %s)" % common_dump.HexDump(r_type3) + print " ('r_type2', %s)" % common_dump.HexDump(r_type2) + print " ('r_type', %s)" % common_dump.HexDump(r_type) else: - r_sym = (r_info >> 8, 24) - r_type = (r_info & 0xff, 8) - print " ('r_sym', %s)" % common_dump.HexDump(r_sym) - print " ('r_type', %s)" % common_dump.HexDump(r_type) + r_info = f.readWord()[0] + if f.is64Bit: + r_sym = (r_info >> 32, 32) + r_type = (r_info & 0xffffffff, 32) + else: + r_sym = (r_info >> 8, 24) + r_type = (r_info & 0xff, 8) + print " ('r_sym', %s)" % common_dump.HexDump(r_sym) + print " ('r_type', %s)" % common_dump.HexDump(r_type) if dumprela: print " ('r_addend', %s)" % common_dump.HexDump(f.readWord()) print " )," @@ -166,7 +180,13 @@ def dumpELF(path, opts): f.seek(16) # Seek to end of e_ident. print "('e_type', %s)" % common_dump.HexDump(f.read16()) - print "('e_machine', %s)" % common_dump.HexDump(f.read16()) + + # Does any other architecture use N64? + e_machine = f.read16() + if e_machine[0] == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit + f.isN64 = True + + print "('e_machine', %s)" % common_dump.HexDump(e_machine) print "('e_version', %s)" % common_dump.HexDump(f.read32()) print "('e_entry', %s)" % common_dump.HexDump(f.readWord()) print "('e_phoff', %s)" % common_dump.HexDump(f.readWord()) diff --git a/test/TableGen/DefmInherit.td b/test/TableGen/DefmInherit.td index 47fd81d2e7..46d3f62c6d 100644 --- a/test/TableGen/DefmInherit.td +++ b/test/TableGen/DefmInherit.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {zing = 4} | count 4 +// RUN: llvm-tblgen %s | grep "zing = 4" | count 4 // XFAIL: vg_leak class C1<int A, string B> { diff --git a/test/TableGen/LazyChange.td b/test/TableGen/LazyChange.td index 8145a3ff8d..306959ebb6 100644 --- a/test/TableGen/LazyChange.td +++ b/test/TableGen/LazyChange.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {int Y = 3} +// RUN: llvm-tblgen %s | grep "int Y = 3" // XFAIL: vg_leak class C { diff --git a/test/TableGen/ListOfList.td b/test/TableGen/ListOfList.td index 565a99cf5f..864401ec3c 100644 --- a/test/TableGen/ListOfList.td +++ b/test/TableGen/ListOfList.td @@ -1,6 +1,6 @@ // RUN llvm-tblgen %s | FileCheck %s -// RUN: llvm-tblgen %s | grep {foo} | count 1 +// RUN: llvm-tblgen %s | grep "foo" | count 1 // XFAIL: vg_leak class Base<string t> { diff --git a/test/TableGen/MultiClass.td b/test/TableGen/MultiClass.td index 04f3a56558..449c5d6c04 100644 --- a/test/TableGen/MultiClass.td +++ b/test/TableGen/MultiClass.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {zing = 4} | count 2 +// RUN: llvm-tblgen %s | grep "zing = 4" | count 2 // XFAIL: vg_leak class C1<int A, string B> { diff --git a/test/TableGen/MultiClassInherit.td b/test/TableGen/MultiClassInherit.td index 8b78bc7736..c768fff0b6 100644 --- a/test/TableGen/MultiClassInherit.td +++ b/test/TableGen/MultiClassInherit.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {zing = 4} | count 28 +// RUN: llvm-tblgen %s | grep "zing = 4" | count 28 // XFAIL: vg_leak class C1<int A, string B> { diff --git a/test/TableGen/Slice.td b/test/TableGen/Slice.td index 2d2822c53b..6d051d77c8 100644 --- a/test/TableGen/Slice.td +++ b/test/TableGen/Slice.td @@ -1,5 +1,5 @@ -// RUN: llvm-tblgen %s | grep {\\\[(set} | count 2 -// RUN: llvm-tblgen %s | grep {\\\[\\\]} | count 2 +// RUN: llvm-tblgen %s | grep "\[(set" | count 2 +// RUN: llvm-tblgen %s | grep "\[\]" | count 2 // XFAIL: vg_leak class ValueType<int size, int value> { diff --git a/test/TableGen/TargetInstrSpec.td b/test/TableGen/TargetInstrSpec.td index 7b611e7c17..64b706dc6a 100644 --- a/test/TableGen/TargetInstrSpec.td +++ b/test/TableGen/TargetInstrSpec.td @@ -1,5 +1,5 @@ -// RUN: llvm-tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_pd VR128:\$src1, VR128:\$src2))\\\]} | count 1 -// RUN: llvm-tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_ps VR128:\$src1, VR128:\$src2))\\\]} | count 1 +// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_pd VR128:$src1, VR128:$src2))\]' | count 1 +// RUN: llvm-tblgen %s | grep '\[(set VR128:$dst, (int_x86_sse2_add_ps VR128:$src1, VR128:$src2))\]' | count 1 // XFAIL: vg_leak class ValueType<int size, int value> { diff --git a/test/TableGen/cast.td b/test/TableGen/cast.td index 8a23eb4cc9..7948aff795 100644 --- a/test/TableGen/cast.td +++ b/test/TableGen/cast.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {add_ps} | count 3 +// RUN: llvm-tblgen %s | grep "add_ps" | count 3 // XFAIL: vg_leak class ValueType<int size, int value> { diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td index 814ae6ef93..902af25237 100644 --- a/test/TableGen/foreach.td +++ b/test/TableGen/foreach.td @@ -1,6 +1,6 @@ -// RUN: llvm-tblgen %s | grep {Jr} | count 2 -// RUN: llvm-tblgen %s | grep {Sr} | count 2 -// RUN: llvm-tblgen %s | grep {"NAME"} | count 1 +// RUN: llvm-tblgen %s | grep 'Jr' | count 2 +// RUN: llvm-tblgen %s | grep 'Sr' | count 2 +// RUN: llvm-tblgen %s | grep '"NAME"' | count 1 // XFAIL: vg_leak // Variables for foreach diff --git a/test/TableGen/lisp.td b/test/TableGen/lisp.td index 025aca961c..dd85ddc67c 100644 --- a/test/TableGen/lisp.td +++ b/test/TableGen/lisp.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | grep {} +// RUN: llvm-tblgen %s | grep "" // XFAIL: vg_leak class List<list<string> n> { diff --git a/test/TableGen/subst.td b/test/TableGen/subst.td index 5a73ec4f12..850ac38465 100644 --- a/test/TableGen/subst.td +++ b/test/TableGen/subst.td @@ -1,9 +1,9 @@ -// RUN: llvm-tblgen %s | grep {Smith} | count 7 -// RUN: llvm-tblgen %s | grep {Johnson} | count 2 -// RUN: llvm-tblgen %s | grep {FIRST} | count 1 -// RUN: llvm-tblgen %s | grep {LAST} | count 1 -// RUN: llvm-tblgen %s | grep {TVAR} | count 2 -// RUN: llvm-tblgen %s | grep {Bogus} | count 1 +// RUN: llvm-tblgen %s | grep "Smith" | count 7 +// RUN: llvm-tblgen %s | grep "Johnson" | count 2 +// RUN: llvm-tblgen %s | grep "FIRST" | count 1 +// RUN: llvm-tblgen %s | grep "LAST" | count 1 +// RUN: llvm-tblgen %s | grep "TVAR" | count 2 +// RUN: llvm-tblgen %s | grep "Bogus" | count 1 // XFAIL: vg_leak class Honorific<string t> { diff --git a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll index d7d5eb548a..210eb97bc1 100644 --- a/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -argpromotion -S > %t -; RUN: cat %t | grep {define.*@callee(.*i32\\*} +; RUN: cat %t | grep "define.*@callee(.*i32\*" ; PR2498 ; This test tries to convince argpromotion about promoting the load from %A + 2, diff --git a/test/Transforms/ArgumentPromotion/byval-2.ll b/test/Transforms/ArgumentPromotion/byval-2.ll index bd62c6835f..368c6896cf 100644 --- a/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/test/Transforms/ArgumentPromotion/byval-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -argpromotion -S | grep -F {i32* byval} | count 2 +; RUN: opt < %s -argpromotion -S | grep -F "i32* byval" | count 2 ; Argpromote + scalarrepl should change this to passing the two integers by value. %struct.ss = type { i32, i64 } diff --git a/test/Transforms/ArgumentPromotion/control-flow.ll b/test/Transforms/ArgumentPromotion/control-flow.ll index 08ca6bccd6..e4a61da45c 100644 --- a/test/Transforms/ArgumentPromotion/control-flow.ll +++ b/test/Transforms/ArgumentPromotion/control-flow.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -argpromotion -S | \ -; RUN: not grep {load i32\* null} +; RUN: not grep "load i32* null" define internal i32 @callee(i1 %C, i32* %P) { br i1 %C, label %T, label %F diff --git a/test/Transforms/ArgumentPromotion/control-flow2.ll b/test/Transforms/ArgumentPromotion/control-flow2.ll index 9a8afc32a8..2543218baf 100644 --- a/test/Transforms/ArgumentPromotion/control-flow2.ll +++ b/test/Transforms/ArgumentPromotion/control-flow2.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -argpromotion -S | \ -; RUN: grep {load i32\\* %A} +; RUN: grep "load i32\* %A" target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @callee(i1 %C, i32* %P) { diff --git a/test/Transforms/BBVectorize/metadata.ll b/test/Transforms/BBVectorize/metadata.ll new file mode 100644 index 0000000000..1e3aaa127a --- /dev/null +++ b/test/Transforms/BBVectorize/metadata.ll @@ -0,0 +1,49 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -S | FileCheck %s + +; Simple 3-pair chain with loads and stores (with fpmath) +define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly { +entry: + %i0 = load double* %a, align 8 + %i1 = load double* %b, align 8 + %mul = fmul double %i0, %i1, !fpmath !2 + %arrayidx3 = getelementptr inbounds double* %a, i64 1 + %i3 = load double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double* %b, i64 1 + %i4 = load double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4, !fpmath !3 + store double %mul, double* %c, align 8 + %arrayidx5 = getelementptr inbounds double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + ret void +; CHECK: @test1 +; CHECK: !fpmath +; CHECK: ret void +} + +; Simple 3-pair chain with loads and stores (ints with range) +define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly { +entry: + %i0 = load i64* %a, align 8, !range !0 + %i1 = load i64* %b, align 8 + %mul = mul i64 %i0, %i1 + %arrayidx3 = getelementptr inbounds i64* %a, i64 1 + %i3 = load i64* %arrayidx3, align 8, !range !1 + %arrayidx4 = getelementptr inbounds i64* %b, i64 1 + %i4 = load i64* %arrayidx4, align 8 + %mul5 = mul i64 %i3, %i4 + store i64 %mul, i64* %c, align 8 + %arrayidx5 = getelementptr inbounds i64* %c, i64 1 + store i64 %mul5, i64* %arrayidx5, align 8 + ret void +; CHECK: @test2 +; CHECK-NOT: !range +; CHECK: ret void +} + +!0 = metadata !{i64 0, i64 2} +!1 = metadata !{i64 3, i64 5} + +!2 = metadata !{ float 5.0 } +!3 = metadata !{ float 2.5 } + diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll index 4daa5714fb..325792a5dc 100644 --- a/test/Transforms/BBVectorize/simple-sel.ll +++ b/test/Transforms/BBVectorize/simple-sel.ll @@ -1,5 +1,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-no-bools -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-NB ; Basic depth-3 chain with select define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) { @@ -27,4 +28,32 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 ; CHECK: ret double %R } +; Basic depth-3 chain with select (and vect. compare) +define double @test2(double %A1, double %A2, double %B1, double %B2) { +; CHECK: @test2 +; CHECK-NB: @test2 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 + %C1 = fcmp ogt double %X1, %A1 + %C2 = fcmp ogt double %X2, %A2 +; CHECK: %C1 = fcmp ogt <2 x double> %X1, %X1.v.i0.2 +; CHECK-NB: fcmp ogt double + %Z1 = select i1 %C1, double %Y1, double %B1 + %Z2 = select i1 %C2, double %Y2, double %B2 +; CHECK: %Z1 = select <2 x i1> %C1, <2 x double> %Y1, <2 x double> %X1.v.i1.2 + %R = fmul double %Z1, %Z2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 + ret double %R +; CHECK: ret double %R +} diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index 904d766bb6..88eb9c90f7 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -138,8 +138,7 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1 %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3> %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1> -; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15> -; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> +; CHECK: %Q1.v.i1 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <16 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> ; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9> %R = mul <8 x i8> %Q1, %Q2 ; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll new file mode 100644 index 0000000000..153be73f83 --- /dev/null +++ b/test/Transforms/BBVectorize/simple3.ll @@ -0,0 +1,35 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-vector-bits=192 -instcombine -gvn -S | FileCheck %s + +; Basic depth-3 chain +define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) { +; CHECK: @test1 +; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1 +; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2 +; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1 +; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %X3 = fsub double %A3, %B3 +; CHECK: %X1 = fsub <3 x double> %X1.v.i0, %X1.v.i1 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Y3 = fmul double %X3, %A3 +; CHECK: %Y1 = fmul <3 x double> %X1, %X1.v.i0 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %Z3 = fadd double %Y3, %B3 +; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1 + %R1 = fmul double %Z1, %Z2 + %R = fmul double %R1, %Z3 +; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2 +; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1 +; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2 +; CHECK: %R = fmul double %R1, %Z1.v.r210 + ret double %R +; CHECK: ret double %R +} + diff --git a/test/Transforms/BoundsChecking/alloc_size.ll b/test/Transforms/BoundsChecking/alloc_size.ll deleted file mode 100644 index 71910bc2ca..0000000000 --- a/test/Transforms/BoundsChecking/alloc_size.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: opt < %s -bounds-checking -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -declare i64* @alloc(i32, i8, i32) -declare i32* @alloc2(i32, i32) - -; CHECK: @f1 -define void @f1(i32 %x) { - %call = tail call i32* @alloc2(i32 %x, i32 4) nounwind, !alloc_size !0 -; CHECK: trap - store i32 3, i32* %call, align 4 - ret void -} - -; CHECK: @f2 -define void @f2() { - %call1 = tail call i32* @alloc2(i32 2, i32 4) nounwind, !alloc_size !0 - %arrayidx = getelementptr i32* %call1, i64 2 -; CHECK: br label - store i32 3, i32* %arrayidx, align 4 - ret void -} - -; CHECK: @f3 -define void @f3(i32 %x, i8 %y) { - %call = tail call i64* @alloc(i32 %x, i8 %y, i32 7) nounwind, !alloc_size !1 -; CHECK: trap - store i64 27, i64* %call, align 4 - ret void -} - -; CHECK: @f4 -define void @f4() { - %call1 = tail call i32* @alloc2(i32 2, i32 4) nounwind, !alloc_size !0 - %arrayidx = getelementptr i32* %call1, i64 1 -; CHECK-NOT: trap - store i32 3, i32* %arrayidx, align 4 -; CHECK: ret - ret void -} - -!0 = metadata !{i32 0, i32 1} -!1 = metadata !{i32 2} diff --git a/test/Transforms/BoundsChecking/many-trap.ll b/test/Transforms/BoundsChecking/many-trap.ll index 787779d137..0bbb9592b0 100644 --- a/test/Transforms/BoundsChecking/many-trap.ll +++ b/test/Transforms/BoundsChecking/many-trap.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -bounds-checking -bounds-checking-multiple-traps -S | FileCheck %s +; RUN: opt < %s -bounds-checking -S | FileCheck %s +; RUN: opt < %s -bounds-checking -bounds-checking-single-trap -S | FileCheck -check-prefix=SINGLE %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" ; CHECK: @f1 @@ -7,6 +8,9 @@ define void @f1(i64 %x) nounwind { %2 = load i128* %1, align 4 %3 = load i128* %1, align 4 ret void -; CHECK: llvm.trap -; CHECK: llvm.trap +; CHECK: call void @llvm.trap() +; CHECK: call void @llvm.trap() +; CHECK-NOT: call void @llvm.trap() +; SINGLE: call void @llvm.trap() +; SINGLE-NOT: call void @llvm.trap() } diff --git a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll index b957220aa9..ca1d6180c7 100644 --- a/test/Transforms/ConstProp/2002-05-03-NotOperator.ll +++ b/test/Transforms/ConstProp/2002-05-03-NotOperator.ll @@ -5,7 +5,7 @@ ; Fix #2: The unary not instruction now no longer exists. Change to xor. ; RUN: opt < %s -constprop -S | \ -; RUN: not grep {i32 0} +; RUN: not grep "i32 0" define i32 @test1() { %R = xor i32 123, -1 ; <i32> [#uses=1] diff --git a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll index 0b44b99f6a..d68cb26da3 100644 --- a/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll +++ b/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -constprop -S | \ -; RUN: not grep {ret i1 false} +; RUN: not grep "ret i1 false" @b = external global [2 x { }] ; <[2 x { }]*> [#uses=2] diff --git a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll index be76783e8b..4a93144d2d 100644 --- a/test/Transforms/ConstProp/2006-11-30-vector-cast.ll +++ b/test/Transforms/ConstProp/2006-11-30-vector-cast.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -constprop -S | \ -; RUN: grep {i32 -1} +; RUN: grep "i32 -1" ; RUN: opt < %s -constprop -S | \ ; RUN: not grep zeroinitializer diff --git a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll index e46a875a7c..ce66c70648 100644 --- a/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll +++ b/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {ret i1 false} +; RUN: grep "ret i1 false" define i1 @test() { %X = trunc i32 320 to i1 ; <i1> [#uses=1] ret i1 %X diff --git a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll index 3c06693b10..71db4211c5 100644 --- a/test/Transforms/ConstProp/2006-12-01-bool-casts.ll +++ b/test/Transforms/ConstProp/2006-12-01-bool-casts.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -constprop -S | \ -; RUN: grep {ret i32 -1} +; RUN: grep "ret i32 -1" ; RUN: opt < %s -constprop -S | \ -; RUN: grep {ret i32 1} +; RUN: grep "ret i32 1" define i32 @test1() { %A = sext i1 true to i32 ; <i32> [#uses=1] diff --git a/test/Transforms/ConstProp/2007-02-23-sdiv.ll b/test/Transforms/ConstProp/2007-02-23-sdiv.ll index 721199fc7f..75f58b573f 100644 --- a/test/Transforms/ConstProp/2007-02-23-sdiv.ll +++ b/test/Transforms/ConstProp/2007-02-23-sdiv.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llvm-dis | grep {global i32 0} +; RUN: llvm-as < %s | llvm-dis | grep "global i32 0" ; PR1215 @G = global i32 sdiv (i32 0, i32 -1) diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll index a28c9b0a2f..6d34cb17ff 100644 --- a/test/Transforms/ConstProp/2007-11-23-cttz.ll +++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -constprop -S | grep {ret i13 13} +; RUN: opt < %s -constprop -S | grep "ret i13 13" ; PR1816 declare i13 @llvm.cttz.i13(i13, i1) diff --git a/test/Transforms/ConstProp/div-zero.ll b/test/Transforms/ConstProp/div-zero.ll index f78a34fe70..a2c59d3c00 100644 --- a/test/Transforms/ConstProp/div-zero.ll +++ b/test/Transforms/ConstProp/div-zero.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 0} +; RUN: opt < %s -instcombine -S | grep "ret i32 0" ; PR4424 declare void @ext() diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll index 4ac478b0d5..6750546ba1 100644 --- a/test/Transforms/CorrelatedValuePropagation/range.ll +++ b/test/Transforms/CorrelatedValuePropagation/range.ll @@ -98,3 +98,70 @@ return: %retval.0 = phi i32 [ 42, %sw.default ], [ 4, %if.then ], [ 9, %if.end ] ret i32 %retval.0 } + +; CHECK: @test5 +define i1 @test5(i32 %c) nounwind { + %cmp = icmp slt i32 %c, 5 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %cmp1 = icmp eq i32 %c, 4 + br i1 %cmp1, label %if.end, label %if.end8 + +if.end: + ret i1 true + +if.end8: + %cmp2 = icmp eq i32 %c, 3 + %cmp3 = icmp eq i32 %c, 4 + %cmp4 = icmp eq i32 %c, 6 +; CHECK: %or = or i1 false, false + %or = or i1 %cmp3, %cmp4 +; CHECK: ret i1 %cmp2 + ret i1 %cmp2 +} + +; CHECK: @test6 +define i1 @test6(i32 %c) nounwind { + %cmp = icmp ule i32 %c, 7 + br i1 %cmp, label %if.then, label %if.end + +if.then: +; CHECK: icmp eq i32 %c, 6 +; CHECK: br i1 + switch i32 %c, label %if.end [ + i32 6, label %sw.bb + i32 8, label %sw.bb + ] + +if.end: + ret i1 true + +sw.bb: + %cmp2 = icmp eq i32 %c, 6 +; CHECK: ret i1 true + ret i1 %cmp2 +} + +; CHECK: @test7 +define i1 @test7(i32 %c) nounwind { +entry: + switch i32 %c, label %sw.default [ + i32 6, label %sw.bb + i32 7, label %sw.bb + ] + +sw.bb: + ret i1 true + +sw.default: + %cmp5 = icmp eq i32 %c, 5 + %cmp6 = icmp eq i32 %c, 6 + %cmp7 = icmp eq i32 %c, 7 + %cmp8 = icmp eq i32 %c, 8 +; CHECK: %or = or i1 %cmp5, false + %or = or i1 %cmp5, %cmp6 +; CHECK: %or2 = or i1 false, %cmp8 + %or2 = or i1 %cmp7, %cmp8 + ret i1 false +} diff --git a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll index d5bd6c4df5..e5419f72f6 100644 --- a/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll +++ b/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -deadargelim -S | grep {@test(} +; RUN: opt < %s -deadargelim -S | grep "@test(" ; RUN: opt < %s -deadargelim -S | not grep dead define internal i32 @test(i32 %X, i32 %dead) { diff --git a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll index d4edce9baf..cdd893faba 100644 --- a/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll +++ b/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -deadargelim -S | not grep {ret i32 0} +; RUN: opt < %s -deadargelim -S | not grep "ret i32 0" ; PR1735 define internal i32 @test(i32 %A, ...) { diff --git a/test/Transforms/DeadArgElim/canon.ll b/test/Transforms/DeadArgElim/canon.ll index 11cd482b7b..79c15a04c4 100644 --- a/test/Transforms/DeadArgElim/canon.ll +++ b/test/Transforms/DeadArgElim/canon.ll @@ -1,9 +1,9 @@ ; This test shows a few canonicalizations made by deadargelim ; RUN: opt < %s -deadargelim -S > %t ; This test should remove {} and replace it with void -; RUN: cat %t | grep {define internal void @test} +; RUN: cat %t | grep "define internal void @test" ; This test shouls replace the {i32} return value with just i32 -; RUN: cat %t | grep {define internal i32 @test2} +; RUN: cat %t | grep "define internal i32 @test2" define internal {} @test() { ret {} undef diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll index 4d6aae37a3..dc92dc9f17 100644 --- a/test/Transforms/DeadArgElim/keepalive.ll +++ b/test/Transforms/DeadArgElim/keepalive.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -deadargelim -S > %t -; RUN: grep {define internal zeroext i32 @test1() nounwind} %t -; RUN: grep {define internal <{ i32, i32 }> @test2} %t +; RUN: grep "define internal zeroext i32 @test1() nounwind" %t +; RUN: grep "define internal <{ i32, i32 }> @test2" %t %Ty = type <{ i32, i32 }> diff --git a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll index 7ef5f06f06..f38c03acca 100644 --- a/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll +++ b/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q} -; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} +; RUN: opt < %s -functionattrs -S | not grep "nocapture *%%q" +; RUN: opt < %s -functionattrs -S | grep "nocapture *%%p" define i32* @a(i32** %p) { %tmp = load i32** %p diff --git a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll index 9983374b15..7e9c982de5 100644 --- a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll +++ b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | not grep {tmp10 =} +; RUN: opt < %s -basicaa -gvn -S | not grep "tmp10 =" %struct.INT2 = type { i32, i32 } @blkshifts = external global %struct.INT2* ; <%struct.INT2**> [#uses=2] diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll index f2c001296f..5018a0747a 100644 --- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll +++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | grep {tmp47 = phi i32 } +; RUN: opt < %s -basicaa -gvn -S | grep "tmp47 = phi i32 " %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } @debug = external constant i32 ; <i32*> [#uses=0] diff --git a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll index a570e3571e..13419d19e1 100644 --- a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll +++ b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | not grep {tmp701 =} +; RUN: opt < %s -basicaa -gvn -S | not grep "tmp701 =" @img_width = external global i16 ; <i16*> [#uses=2] diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll index 407940b87d..4f07868a1c 100644 --- a/test/Transforms/GVN/2008-07-02-Unreachable.ll +++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | grep {ret i8 \[%\]tmp3} +; RUN: opt < %s -basicaa -gvn -S | grep "ret i8 [%]tmp3" ; PR2503 @g_3 = external global i8 ; <i8*> [#uses=2] diff --git a/test/Transforms/GVN/basic.ll b/test/Transforms/GVN/basic.ll index 1decafac8c..6f4aace45f 100644 --- a/test/Transforms/GVN/basic.ll +++ b/test/Transforms/GVN/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -gvn -S | not grep {%z2 =} +; RUN: opt < %s -gvn -S | not grep "%z2 =" define i32 @main() { block1: diff --git a/test/Transforms/GVN/calls-readonly.ll b/test/Transforms/GVN/calls-readonly.ll index 97ec91512e..a4777401b1 100644 --- a/test/Transforms/GVN/calls-readonly.ll +++ b/test/Transforms/GVN/calls-readonly.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | grep {call.*strlen} | count 1 +; RUN: opt < %s -basicaa -gvn -S | grep "call.*strlen" | count 1 ; Should delete the second call to strlen even though the intervening strchr call exists. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/Transforms/GVN/load-constant-mem.ll b/test/Transforms/GVN/load-constant-mem.ll index 314c8069ca..a7dacea6b5 100644 --- a/test/Transforms/GVN/load-constant-mem.ll +++ b/test/Transforms/GVN/load-constant-mem.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0} +; RUN: opt < %s -basicaa -gvn -instcombine -S | grep "ret i32 0" ; PR4189 @G = external constant [4 x i32] diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/local-pre.ll index 5f03984653..1d0dadfbe0 100644 --- a/test/Transforms/GVN/local-pre.ll +++ b/test/Transforms/GVN/local-pre.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -gvn -enable-pre -S | grep {b.pre} +; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre" define i32 @main(i32 %p) { block1: diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll index dba9d81405..afcb7fe3bb 100644 --- a/test/Transforms/GVN/nonescaping-malloc.ll +++ b/test/Transforms/GVN/nonescaping-malloc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -stats -disable-output |& grep {Number of loads deleted} +; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted" ; rdar://7363102 ; GVN should be able to eliminate load %tmp22.i, because it is redundant with diff --git a/test/Transforms/GVN/pre-basic-add.ll b/test/Transforms/GVN/pre-basic-add.ll index c13099fe73..4bde05c338 100644 --- a/test/Transforms/GVN/pre-basic-add.ll +++ b/test/Transforms/GVN/pre-basic-add.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -gvn -enable-pre -S | grep {.pre} +; RUN: opt < %s -gvn -enable-pre -S | grep ".pre" @H = common global i32 0 ; <i32*> [#uses=2] @G = common global i32 0 ; <i32*> [#uses=1] diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll index 479724063e..e7dc9c423f 100644 --- a/test/Transforms/GVN/rle-must-alias.ll +++ b/test/Transforms/GVN/rle-must-alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 } +; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 " ; GVN should eliminate the fully redundant %9 GEP which ; allows DEAD to be removed. This is PR3198. diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll index c6cd1fdc00..71aa548ab1 100644 --- a/test/Transforms/GVN/rle-semidominated.ll +++ b/test/Transforms/GVN/rle-semidominated.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 } +; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 " define i32 @main(i32* %p) { block1: diff --git a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll index 82abc8fe54..7c07d5d9a2 100644 --- a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll +++ b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -globalopt -S | grep {16 x .31 x double.. zeroinitializer} +; RUN: opt < %s -globalopt -S | grep "16 x .31 x double.. zeroinitializer" ; The 'X' indices could be larger than 31. Do not SROA the outer indices of this array. @mm = internal global [16 x [31 x double]] zeroinitializer, align 32 diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll index 588d5c9a68..08b2cb1eb6 100644 --- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll +++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -globalopt -S | grep {load volatile} +; RUN: opt < %s -globalopt -S | grep "load volatile" @t0.1441 = internal global double 0x3FD5555555555555, align 8 ; <double*> [#uses=1] define double @foo() nounwind { diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll index 5b06fea5d9..d58becd530 100644 --- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll +++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll @@ -2,9 +2,9 @@ ; alignments. Elements 0 and 2 must be 16-byte aligned, and element ; 1 must be at least 8 byte aligned (but could be more). -; RUN: opt < %s -globalopt -S | grep {@G.0 = internal unnamed_addr global .*align 16} -; RUN: opt < %s -globalopt -S | grep {@G.1 = internal unnamed_addr global .*align 8} -; RUN: opt < %s -globalopt -S | grep {@G.2 = internal unnamed_addr global .*align 16} +; RUN: opt < %s -globalopt -S | grep "@G.0 = internal unnamed_addr global .*align 16" +; RUN: opt < %s -globalopt -S | grep "@G.1 = internal unnamed_addr global .*align 8" +; RUN: opt < %s -globalopt -S | grep "@G.2 = internal unnamed_addr global .*align 16" ; rdar://5891920 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll index c4b6e52e71..e76c44dbd0 100644 --- a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll +++ b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -globalopt -S | grep {phi.*@head} +; RUN: opt < %s -globalopt -S | grep "phi.*@head" ; PR3321 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll index 3154856574..0f3efa09a1 100644 --- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll +++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -globalopt -stats -disable-output |& grep "1 globalopt - Number of global vars shrunk to booleans" +; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans" @Stop = internal global i32 0 ; <i32*> [#uses=3] diff --git a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll index d645ce4943..059af1cfea 100644 --- a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll +++ b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -globalopt -S | grep {@X = internal unnamed_addr global i32} +; RUN: opt < %s -globalopt -S | grep "@X = internal unnamed_addr global i32" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin7" @X = internal global i32* null ; <i32**> [#uses=2] diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll index 099c607509..be13a98118 100644 --- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll +++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -globalopt -S | \ -; RUN: grep {internal fastcc float @foo} +; RUN: grep "internal fastcc float @foo" define internal float @foo() { ret float 0.000000e+00 diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll index c8d8e7674d..cad5a91488 100644 --- a/test/Transforms/GlobalOpt/deadglobal.ll +++ b/test/Transforms/GlobalOpt/deadglobal.ll @@ -1,9 +1,25 @@ -; RUN: opt < %s -globalopt -S | not grep internal +; RUN: opt < %s -globalopt -S | FileCheck %s -@G = internal global i32 123 ; <i32*> [#uses=1] +@G1 = internal global i32 123 ; <i32*> [#uses=1] -define void @foo() { - store i32 1, i32* @G +; CHECK-NOT: @G1 +; CHECK: @G2 +; CHECK-NOT: @G3 + +define void @foo1() { +; CHECK: define void @foo +; CHECK-NEXT: ret + store i32 1, i32* @G1 + ret void +} + +@G2 = linkonce_odr constant i32 42 + +define void @foo2() { +; CHECK: define void @foo2 +; CHECK-NEXT: store + store i32 1, i32* @G2 ret void } +@G3 = linkonce_odr constant i32 42 diff --git a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll index 1e0db6a998..cc655e9a2d 100644 --- a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll +++ b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -globalopt -S > %t -; RUN: grep {@Y = internal unnamed_addr global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t +; RUN: grep "@Y = internal unnamed_addr global \[3 x [%]struct.X\] zeroinitializer" %t ; RUN: grep load %t | count 6 -; RUN: grep {add i32 \[%\]a, \[%\]b} %t | count 3 +; RUN: grep "add i32 [%]a, [%]b" %t | count 3 ; globalopt should not sra the global, because it can't see the index. diff --git a/test/Transforms/GlobalOpt/heap-sra-phi.ll b/test/Transforms/GlobalOpt/heap-sra-phi.ll index 6188e5af98..123ad851f7 100644 --- a/test/Transforms/GlobalOpt/heap-sra-phi.ll +++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. } -; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. } +; RUN: opt < %s -globalopt -S | grep "tmp.f1 = phi i32. " +; RUN: opt < %s -globalopt -S | grep "tmp.f0 = phi i32. " target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.foo = type { i32, i32 } diff --git a/test/Transforms/GlobalOpt/integer-bool.ll b/test/Transforms/GlobalOpt/integer-bool.ll index 59403b18d9..5a34a9c4da 100644 --- a/test/Transforms/GlobalOpt/integer-bool.ll +++ b/test/Transforms/GlobalOpt/integer-bool.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -globalopt -instcombine | \ -; RUN: llvm-dis | grep {ret i1 true} +; RUN: llvm-dis | grep "ret i1 true" ;; check that global opt turns integers that only hold 0 or 1 into bools. diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll index 94e07a0848..dcfe009e33 100644 --- a/test/Transforms/GlobalOpt/memcpy.ll +++ b/test/Transforms/GlobalOpt/memcpy.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -globalopt -S | \ -; RUN: grep {G1 = internal unnamed_addr constant} +; RUN: grep "G1 = internal unnamed_addr constant" @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00" ; <[58 x i8]*> [#uses=1] diff --git a/test/Transforms/GlobalOpt/storepointer-compare.ll b/test/Transforms/GlobalOpt/storepointer-compare.ll index 2f5ae869b7..09e20a8adb 100644 --- a/test/Transforms/GlobalOpt/storepointer-compare.ll +++ b/test/Transforms/GlobalOpt/storepointer-compare.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -globalopt -S | \ -; RUN: grep {call void @Actual} +; RUN: grep "call void @Actual" ; Check that a comparison does not prevent an indirect call from being made ; direct. The global will still remain, but indirect call elim is still good. diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll index be02821227..ee75058731 100644 --- a/test/Transforms/GlobalOpt/unnamed-addr.ll +++ b/test/Transforms/GlobalOpt/unnamed-addr.ll @@ -4,17 +4,31 @@ @b = internal global i32 0, align 4 @c = internal global i32 0, align 4 @d = internal constant [4 x i8] c"foo\00", align 1 +@e = linkonce_odr global i32 0 ; CHECK: @a = internal global i32 0, align 4 ; CHECK: @b = internal global i32 0, align 4 ; CHECK: @c = internal unnamed_addr global i32 0, align 4 ; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1 +; CHECK: @e = linkonce_odr global i32 0 + +define i32 @get_e() { + %t = load i32* @e + ret i32 %t +} + +define void @set_e(i32 %x) { + store i32 %x, i32* @e + ret void +} define i1 @bah(i64 %i) nounwind readonly optsize ssp { entry: %arrayidx4 = getelementptr inbounds [4 x i8]* @d, i64 0, i64 %i %tmp5 = load i8* %arrayidx4, align 1 - %cmp = icmp eq i8 %tmp5, 42 + %array0 = bitcast [4 x i8]* @d to i8* + %tmp6 = load i8* %array0, align 1 + %cmp = icmp eq i8 %tmp5, %tmp6 ret i1 %cmp } diff --git a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll index 66403363bf..54a65d61da 100644 --- a/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll +++ b/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -ipconstprop -S | grep {ret i32 %r} +; RUN: opt < %s -ipconstprop -S | grep "ret i32 %r" ; Should not propagate the result of a weak function. ; PR2411 diff --git a/test/Transforms/IPConstantProp/return-argument.ll b/test/Transforms/IPConstantProp/return-argument.ll index f4b7018222..2a14f05985 100644 --- a/test/Transforms/IPConstantProp/return-argument.ll +++ b/test/Transforms/IPConstantProp/return-argument.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -ipconstprop -S > %t -; RUN: cat %t | grep {store i32 %Z, i32\\* %Q} -; RUN: cat %t | grep {add i32 1, 3} +; RUN: cat %t | grep "store i32 %Z, i32\* %Q" +; RUN: cat %t | grep "add i32 1, 3" ;; This function returns its second argument on all return statements define internal i32* @incdec(i1 %C, i32* %V) { diff --git a/test/Transforms/IPConstantProp/return-constant.ll b/test/Transforms/IPConstantProp/return-constant.ll index ff15df7388..499d383295 100644 --- a/test/Transforms/IPConstantProp/return-constant.ll +++ b/test/Transforms/IPConstantProp/return-constant.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -ipconstprop -instcombine | \ -; RUN: llvm-dis | grep {ret i1 true} | count 2 +; RUN: llvm-dis | grep "ret i1 true" | count 2 define internal i32 @foo(i1 %C) { br i1 %C, label %T, label %F diff --git a/test/Transforms/IPConstantProp/return-constants.ll b/test/Transforms/IPConstantProp/return-constants.ll index 2cd99fe3b7..be2ca71c55 100644 --- a/test/Transforms/IPConstantProp/return-constants.ll +++ b/test/Transforms/IPConstantProp/return-constants.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -ipconstprop -S > %t ;; Check that the 21 constants got propagated properly -; RUN: cat %t | grep {%M = add i32 21, 21} +; RUN: cat %t | grep "%M = add i32 21, 21" ;; Check that the second return values didn't get propagated -; RUN: cat %t | grep {%N = add i32 %B, %D} +; RUN: cat %t | grep "%N = add i32 %B, %D" %0 = type { i32, i32 } diff --git a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll index 1ba69826fa..edeead1648 100644 --- a/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll +++ b/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -indvars -S | \ -; RUN: grep {ret i32 152} +; RUN: grep "ret i32 152" define i32 @main() { entry: diff --git a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll index 1bbc631080..c4e6cd4b34 100644 --- a/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll +++ b/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll @@ -1,6 +1,6 @@ ; PR726 ; RUN: opt < %s -indvars -S | \ -; RUN: grep {ret i32 27} +; RUN: grep "ret i32 27" ; Make sure to compute the right exit value based on negative strides. diff --git a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll index 268b8d1a7e..6366c8c051 100644 --- a/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll +++ b/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll @@ -1,5 +1,5 @@ ; PR1015 -; RUN: opt < %s -indvars -S | not grep {ret i32 0} +; RUN: opt < %s -indvars -S | not grep "ret i32 0" target datalayout = "e-p:32:32" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll index dd400beaa5..b461566813 100644 --- a/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll +++ b/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | not grep {sext} +; RUN: opt < %s -indvars -S | not grep "sext" ; ModuleID = '<stdin>' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64" target triple = "x86_64-apple-darwin9.6" diff --git a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll index 55e8a5073c..0722d89585 100644 --- a/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll +++ b/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -instcombine -S | not grep {\[sz\]ext} +; RUN: opt < %s -indvars -instcombine -S | not grep "[sz]ext" ; ModuleID = '<stdin>' ;extern int *a, *b, *c, *d, *e, *f; /* 64 bit */ ;extern int K[256]; diff --git a/test/Transforms/IndVarSimplify/eliminate-max.ll b/test/Transforms/IndVarSimplify/eliminate-max.ll index c25bd0e354..98510eaed3 100644 --- a/test/Transforms/IndVarSimplify/eliminate-max.ll +++ b/test/Transforms/IndVarSimplify/eliminate-max.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -indvars | grep {= icmp} | count 3 +; RUN: opt < %s -S -indvars | grep "= icmp" | count 3 ; PR4914.ll ; Indvars should be able to do range analysis and eliminate icmps. diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll index c3619f640b..e51a3410e3 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -indvars -S \ -; RUN: | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]} +; RUN: | grep "%b.1 = phi i32 [ 2, %bb ], [ 1, %bb2 ]" ; ; This loop has multiple exits, and the value of %b1 depends on which ; exit is taken. Indvars should correctly compute the exit values. diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll index 9f3bcaf21b..21fb7ef931 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -indvars -S > %t -; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t -; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t +; RUN: grep "[%]tmp7 = icmp eq i8 -28, -28" %t +; RUN: grep "[%]tmp8 = icmp eq i8 63, 63" %t ; PR4477 ; Indvars should compute the exit values in loop. ; diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll index 65c66f7f5a..0c1b590c4f 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate_3.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate_3.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | grep {ret i32 600000} +; RUN: opt < %s -indvars -S | grep "ret i32 600000" ; PR1179 define i32 @foo() { diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll index e4b642c7f5..d7eb4063b9 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate_4.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate_4.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | grep {ret i32 9900} +; RUN: opt < %s -indvars -S | grep "ret i32 9900" ; PR1179 define i32 @test4() { diff --git a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll index 80b961ac7c..38f95bf1f5 100644 --- a/test/Transforms/IndVarSimplify/loop_evaluate_5.ll +++ b/test/Transforms/IndVarSimplify/loop_evaluate_5.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | grep {120, %bb2.bb3_crit_edge} +; RUN: opt < %s -indvars -S | grep "120, %bb2.bb3_crit_edge" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-pc-linux-gnu" diff --git a/test/Transforms/IndVarSimplify/shrunk-constant.ll b/test/Transforms/IndVarSimplify/shrunk-constant.ll index 271f8edf19..45297d6246 100644 --- a/test/Transforms/IndVarSimplify/shrunk-constant.ll +++ b/test/Transforms/IndVarSimplify/shrunk-constant.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep {\\--> (zext i4 {-7,+,-8}<%loop> to i32)} +; RUN: | grep "\--> (zext i4 {-7,+,-8}<%loop> to i32)" define fastcc void @foo() nounwind { entry: diff --git a/test/Transforms/Inline/2007-04-15-InlineEH.ll b/test/Transforms/Inline/2007-04-15-InlineEH.ll index 8fbcf929d0..b114537490 100644 --- a/test/Transforms/Inline/2007-04-15-InlineEH.ll +++ b/test/Transforms/Inline/2007-04-15-InlineEH.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S | not grep {invoke void asm} +; RUN: opt < %s -inline -S | not grep "invoke void asm" ; PR1335 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/Transforms/Inline/casts.ll b/test/Transforms/Inline/casts.ll index 166185a545..a7b051b067 100644 --- a/test/Transforms/Inline/casts.ll +++ b/test/Transforms/Inline/casts.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S | grep {ret i32 1} +; RUN: opt < %s -inline -S | grep "ret i32 1" ; ModuleID = 'short.opt.bc' define i32 @testBool(i1 %X) { diff --git a/test/Transforms/Inline/delete-call.ll b/test/Transforms/Inline/delete-call.ll index 3505608b82..7716d6a47b 100644 --- a/test/Transforms/Inline/delete-call.ll +++ b/test/Transforms/Inline/delete-call.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -S -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined} -; RUN: opt %s -S -inline -stats |& grep {Number of functions inlined} +; RUN: opt %s -S -inline -functionattrs -stats 2>&1 | grep "Number of call sites deleted, not inlined" +; RUN: opt %s -S -inline -stats 2>&1 | grep "Number of functions inlined" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.8" diff --git a/test/Transforms/Inline/externally_available.ll b/test/Transforms/Inline/externally_available.ll index 08b56385ac..07274e7ecd 100644 --- a/test/Transforms/Inline/externally_available.ll +++ b/test/Transforms/Inline/externally_available.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -inline -constprop -S > %t ; RUN: not grep test_function %t -; RUN: grep {ret i32 5} %t +; RUN: grep "ret i32 5" %t ; test_function should not be emitted to the .s file. diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll index 1f34113a18..e07752396a 100644 --- a/test/Transforms/Inline/inline-invoke-tail.ll +++ b/test/Transforms/Inline/inline-invoke-tail.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S | not grep {tail call void @llvm.memcpy.i32} +; RUN: opt < %s -inline -S | not grep "tail call void @llvm.memcpy.i32" ; PR3550 define internal void @foo(i32* %p, i32* %q) { diff --git a/test/Transforms/Inline/inline_prune.ll b/test/Transforms/Inline/inline_prune.ll index 658a422540..4c1574d83c 100644 --- a/test/Transforms/Inline/inline_prune.ll +++ b/test/Transforms/Inline/inline_prune.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -inline -S | \ -; RUN: not grep {callee\[12\](} +; RUN: not grep "callee[12](" ; RUN: opt < %s -inline -S | not grep mul define internal i32 @callee1(i32 %A, i32 %B) { diff --git a/test/Transforms/Inline/invoke_test-1.ll b/test/Transforms/Inline/invoke_test-1.ll index e0e6d600bb..922351fd46 100644 --- a/test/Transforms/Inline/invoke_test-1.ll +++ b/test/Transforms/Inline/invoke_test-1.ll @@ -2,7 +2,7 @@ ; instructions ; RUN: opt < %s -inline -S | \ -; RUN: not grep {call\[^e\]} +; RUN: not grep "call[^e]" declare void @might_throw() diff --git a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll index 1154bb481d..4233797b6d 100644 --- a/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll +++ b/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {ret i1 false} +; RUN: grep "ret i1 false" define i1 @test(i1 %V) { %Y = icmp ult i1 %V, false ; <i1> [#uses=1] diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll index 8169d2127f..d17db8d7ea 100644 --- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll +++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -mem2reg -S | \ -; RUN: not grep {i32 1} +; RUN: not grep "i32 1" ; When propagating the load through the select, make sure that the load is ; inserted where the original load was, not where the select is. Not doing diff --git a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll index e646edf029..0d5fc810a8 100644 --- a/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll +++ b/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -mem2reg -simplifycfg | \ -; RUN: llvm-dis | grep -v store | not grep {i32 1} +; RUN: llvm-dis | grep -v store | not grep "i32 1" ; Test to make sure that instcombine does not accidentally propagate the load ; into the PHI, which would break the program. diff --git a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll index 38553d7988..02bc043da0 100644 --- a/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll +++ b/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: not grep {ret i1 false} +; RUN: not grep "ret i1 false" define i1 @test(i64 %tmp.169) { %tmp.1710 = lshr i64 %tmp.169, 1 ; <i64> [#uses=1] diff --git a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll index 3d887ddad0..0a513c647c 100644 --- a/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll +++ b/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {ret i1 true} +; RUN: grep "ret i1 true" ; PR586 @g_07918478 = external global i32 ; <i32*> [#uses=1] diff --git a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll index 5a74bd2ab7..295006ca23 100644 --- a/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll +++ b/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {icmp sgt} +; RUN: grep "icmp sgt" ; END. target datalayout = "e-p:32:32" target triple = "i686-pc-linux-gnu" diff --git a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll index c3700a00c4..0c8eeceec8 100644 --- a/test/Transforms/InstCombine/2006-12-15-Range-Test.ll +++ b/test/Transforms/InstCombine/2006-12-15-Range-Test.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -instcombine -S | \ ; RUN: grep icmp | count 1 ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {icmp ugt} | count 1 +; RUN: grep "icmp ugt" | count 1 ; END. target datalayout = "e-p:32:32" diff --git a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll index e5238a577d..635a09ca93 100644 --- a/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll +++ b/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp ugt} +; RUN: opt < %s -instcombine -S | grep "icmp ugt" ; PR1107 ; PR1940 diff --git a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll index d2d215fa86..4fcfd264f4 100644 --- a/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll +++ b/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {fcmp uno.*0.0} +; RUN: opt < %s -instcombine -S | grep "fcmp uno.*0.0" ; PR1111 define i1 @test(double %X) { %tmp = fcmp une double %X, %X diff --git a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll index bd15dce11a..4d1b982f67 100644 --- a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll +++ b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ugt.*, 1} +; RUN: opt < %s -instcombine -S | grep "ugt.*, 1" define i1 @test(i32 %tmp1030) { %tmp1037 = icmp ne i32 %tmp1030, 40 ; <i1> [#uses=1] diff --git a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll index 05891a203a..e2bebecded 100644 --- a/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll +++ b/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -instcombine -mem2reg -S | grep {%A = alloca} +; RUN: opt < %s -instcombine -mem2reg -S | grep "%A = alloca" ; RUN: opt < %s -instcombine -mem2reg -S | \ -; RUN: not grep {%B = alloca} +; RUN: not grep "%B = alloca" ; END. ; Ensure that instcombine doesn't sink the loads in entry/cond_true into diff --git a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll index 109e4a217f..826d68aefc 100644 --- a/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll +++ b/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp sle} +; RUN: opt < %s -instcombine -S | grep "icmp sle" ; PR1244 define i1 @test(i32 %c.3.i, i32 %d.292.2.i) { diff --git a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll index ca93af3a69..719da7083a 100644 --- a/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll +++ b/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll @@ -1,5 +1,5 @@ ; For PR1248 -; RUN: opt < %s -instcombine -S | grep {ugt i32 .*, 11} +; RUN: opt < %s -instcombine -S | grep "ugt i32 .*, 11" define i1 @test(i32 %tmp6) { %tmp7 = sdiv i32 %tmp6, 12 ; <i32> [#uses=1] icmp ne i32 %tmp7, -6 ; <i1>:1 [#uses=1] diff --git a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll index c79400413f..7e9c9e28c9 100644 --- a/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll +++ b/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll @@ -1,6 +1,6 @@ ; PR1271 ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {icmp eq i32 .tmp.*, 2146435072} +; RUN: grep "icmp eq i32 .tmp.*, 2146435072" %struct..0anon = type { i32, i32 } %struct..1anon = type { double } diff --git a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll index 807efcf29f..c4070a1b1d 100644 --- a/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll +++ b/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll @@ -1,6 +1,6 @@ ; PR1271 ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {ashr exact i32 %.mp137, 2} +; RUN: grep "ashr exact i32 %.mp137, 2" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-pc-linux-gnu" diff --git a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll index 15988b6dd9..eb0c364bfa 100644 --- a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll +++ b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {call.*sret} +; RUN: opt < %s -instcombine -S | grep "call.*sret" ; Make sure instcombine doesn't drop the sret attribute. define void @blah(i16* %tmp10) { diff --git a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll index 62b93513b7..082b2155a0 100644 --- a/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll +++ b/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ashr} +; RUN: opt < %s -instcombine -S | grep "ashr" ; PR1499 define void @av_cmp_q_cond_true(i32* %retval, i32* %tmp9, i64* %tmp10) { diff --git a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll index af539c12a3..b2b04d6bd2 100644 --- a/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll +++ b/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 true} +; RUN: opt < %s -instcombine -S | grep "ret i1 true" ; rdar://5278853 define i1 @test(i32 %tmp468) { diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll index 4c5c367bcf..95a445c543 100644 --- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll +++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {call.*stacksave} +; RUN: opt < %s -instcombine -S | grep "call.*stacksave" ; PR1745 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll index 5282739d5c..6b83dd982d 100644 --- a/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll +++ b/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 1} +; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 1" ; PR1800 define i1 @test(i32 %In) { diff --git a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll index 6420537b9d..89f867252b 100644 --- a/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll +++ b/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll @@ -1,6 +1,6 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" -; RUN: opt < %s -instcombine -S | not grep {ret i1 0} +; RUN: opt < %s -instcombine -S | not grep "ret i1 0" ; PR1850 define i1 @test() { diff --git a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll index cc89f6dd20..3745e872ee 100644 --- a/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll +++ b/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {add} | count 1 +; RUN: opt < %s -instcombine -S | grep "add" | count 1 define i32 @foo(i32 %a) { entry: diff --git a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll index 28a94ce07a..1f9c47c4f1 100644 --- a/test/Transforms/InstCombine/2008-01-29-AddICmp.ll +++ b/test/Transforms/InstCombine/2008-01-29-AddICmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | not grep {a.off} +; RUN: opt < %s -instcombine -S | not grep "a.off" ; PR1949 define i1 @test1(i32 %a) { diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll index af61c150a7..917d3d9436 100644 --- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll +++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i.* 0} | count 2 +; RUN: opt < %s -instcombine -S | grep "ret i.* 0" | count 2 ; PR2048 define i32 @i(i32 %a) { diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll index d26dec11e2..854f8cb0b5 100644 --- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll +++ b/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {sdiv i8 \%a, 9} +; RUN: opt < %s -instcombine -S | grep "sdiv i8 \%a, 9" ; PR2048 define i8 @i(i8 %a) { diff --git a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll index da7e49ee84..0fa4d715f2 100644 --- a/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll +++ b/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {16} | count 1 +; RUN: opt < %s -instcombine -S | grep "16" | count 1 define i8* @bork(i8** %qux) { %tmp275 = load i8** %qux, align 1 diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll index de08c32fb4..dba6cdb565 100644 --- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll +++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store volatile} +; RUN: opt < %s -instcombine -S | grep "store volatile" define void @test() { %votf = alloca <4 x float> ; <<4 x float>*> [#uses=1] diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll index 1286e3d63b..fd0217e9f4 100644 --- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll +++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2 +; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" @g_1 = internal global i32 0 ; <i32*> [#uses=3] diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll index ebbd3a743f..8022414d6f 100644 --- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll +++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2 +; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2 ; PR2262 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll index bbd004213d..7a1c844741 100644 --- a/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll +++ b/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store i8} | count 3 +; RUN: opt < %s -instcombine -S | grep "store i8" | count 3 ; PR2297 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll index b34fc1e991..a0e95a9398 100644 --- a/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll +++ b/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 false} | count 2 +; RUN: opt < %s -instcombine -S | grep "ret i1 false" | count 2 ; PR2329 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" diff --git a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll index 2de5af7357..acb259be5e 100644 --- a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll +++ b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 false} +; RUN: opt < %s -instcombine -S | grep "ret i1 false" ; PR2359 define i1 @f(i8* %x) { entry: diff --git a/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/test/Transforms/InstCombine/2008-05-31-AddBool.ll index 541669365b..ed2069041d 100644 --- a/test/Transforms/InstCombine/2008-05-31-AddBool.ll +++ b/test/Transforms/InstCombine/2008-05-31-AddBool.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {xor} +; RUN: opt < %s -instcombine -S | grep "xor" ; PR2389 define i1 @test(i1 %a, i1 %b) { diff --git a/test/Transforms/InstCombine/2008-05-31-Bools.ll b/test/Transforms/InstCombine/2008-05-31-Bools.ll index a0fe47a625..7c33f2dd05 100644 --- a/test/Transforms/InstCombine/2008-05-31-Bools.ll +++ b/test/Transforms/InstCombine/2008-05-31-Bools.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -instcombine -S > %t -; RUN: grep {xor} %t -; RUN: grep {and} %t -; RUN: not grep {div} %t +; RUN: grep "xor" %t +; RUN: grep "and" %t +; RUN: not grep "div" %t define i1 @foo1(i1 %a, i1 %b) { %A = sub i1 %a, %b diff --git a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll index 917d3ae1f8..ec946238d8 100644 --- a/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll +++ b/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {phi i32} | count 2 +; RUN: opt < %s -instcombine -S | grep "phi i32" | count 2 define void @test() nounwind { entry: diff --git a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll index 08959c9c7c..cc469262d5 100644 --- a/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll +++ b/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store i32} | count 2 +; RUN: opt < %s -instcombine -S | grep "store i32" | count 2 @g_139 = global i32 0 ; <i32*> [#uses=2] diff --git a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll index aed1b14ce3..bf5e96b763 100644 --- a/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll +++ b/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store i8} | count 2 +; RUN: opt < %s -instcombine -S | grep "store i8" | count 2 define i32 @a(i8* %s) nounwind { entry: diff --git a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll index c3371c6ae7..80bd83bc6b 100644 --- a/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll +++ b/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 15} +; RUN: opt < %s -instcombine -S | grep "icmp eq i32 %In, 15" ; PR2479 ; (See also PR1800.) diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll index 4f4709b6f2..9c4c1b538c 100644 --- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll +++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {call.*llvm.stackrestore} +; RUN: opt < %s -instcombine -S | grep "call.*llvm.stackrestore" ; PR2488 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu" diff --git a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll index 8245b4d017..cfca72adf8 100644 --- a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll +++ b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp ne i32 \%a} +; RUN: opt < %s -instcombine -S | grep "icmp ne i32 \%a" ; PR2330 define i1 @foo(i32 %a) nounwind { diff --git a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll index 009115966f..a3d44cb246 100644 --- a/test/Transforms/InstCombine/2008-07-08-SubAnd.ll +++ b/test/Transforms/InstCombine/2008-07-08-SubAnd.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep -v {i32 8} +; RUN: opt < %s -instcombine -S | grep -v "i32 8" ; PR2330 define i32 @a(i32 %a) nounwind { diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll index 1ed53237aa..dcf4befa86 100644 --- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll +++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {load volatile} | count 2 +; RUN: opt < %s -instcombine -S | grep "load volatile" | count 2 ; PR2496 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll index 47a7590076..ed0141403b 100644 --- a/test/Transforms/InstCombine/2008-07-09-SubAndError.ll +++ b/test/Transforms/InstCombine/2008-07-09-SubAndError.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | not grep {sub i32 0} +; RUN: opt < %s -instcombine -S | not grep "sub i32 0" ; PR2330 define i32 @foo(i32 %a) nounwind { diff --git a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll index e911532025..786f0c55bb 100644 --- a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll +++ b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {%C = xor i1 %A, true} -; RUN: opt < %s -instcombine -S | grep {ret i1 false} +; RUN: opt < %s -instcombine -S | grep "%C = xor i1 %A, true" +; RUN: opt < %s -instcombine -S | grep "ret i1 false" ; PR2539 define i1 @test1(i1 %A) { diff --git a/test/Transforms/InstCombine/2008-07-13-DivZero.ll b/test/Transforms/InstCombine/2008-07-13-DivZero.ll index be1f8c2943..18c9954283 100644 --- a/test/Transforms/InstCombine/2008-07-13-DivZero.ll +++ b/test/Transforms/InstCombine/2008-07-13-DivZero.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {lshr.*3} -; RUN: opt < %s -instcombine -S | grep {call .*%cond} +; RUN: opt < %s -instcombine -S | grep "lshr.*3" +; RUN: opt < %s -instcombine -S | grep "call .*%cond" ; PR2506 ; We can simplify the operand of udiv to '8', but not the operand to the diff --git a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll index 501d8a66c3..b469887ba2 100644 --- a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll +++ b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | not grep {store } +; RUN: opt < %s -instcombine -S | not grep "store " ; PR2296 @G = common global double 0.000000e+00, align 16 diff --git a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll index 31ea94a5d8..4d00d495a0 100644 --- a/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll +++ b/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {or i1} +; RUN: opt < %s -instcombine -S | grep "or i1" ; PR2844 define i32 @test(i32 %p_74) { diff --git a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll index fd36d86a94..cf29f8d9bf 100644 --- a/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll +++ b/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 false} +; RUN: opt < %s -instcombine -S | grep "ret i1 false" ; PR2697 define i1 @x(i32 %x) nounwind { diff --git a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll index aa077e2ac3..679cc5f73d 100644 --- a/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll +++ b/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 true} +; RUN: opt < %s -instcombine -S | grep "ret i1 true" ; PR2993 define i1 @foo(i32 %x) { diff --git a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll index e4c7ebcefc..75bd5e0175 100644 --- a/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll +++ b/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {i8 2, i8 2} +; RUN: opt < %s -instcombine -S | grep "i8 2, i8 2" ; PR2756 define <2 x i8> @foo(<2 x i8> %x) { diff --git a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll index a61a94ecbf..50ea2f42b0 100644 --- a/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll +++ b/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -instcombine -S > %t -; RUN: grep {, align 4} %t | count 3 -; RUN: grep {, align 8} %t | count 3 +; RUN: grep ", align 4" %t | count 3 +; RUN: grep ", align 8" %t | count 3 ; rdar://6480438 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll index ce62f35c10..949fc59220 100644 --- a/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll +++ b/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store.*addrspace(1)} +; RUN: opt < %s -instcombine -S | grep "store.*addrspace(1)" ; PR3335 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll index 142134791e..68c51b43fc 100644 --- a/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll +++ b/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF8000000000000 | count 12 -; RUN: opt < %s -simplifycfg -instcombine -S | grep {0\\.0} | count 3 -; RUN: opt < %s -simplifycfg -instcombine -S | grep {3\\.5} | count 1 +; RUN: opt < %s -simplifycfg -instcombine -S | grep "0\.0" | count 3 +; RUN: opt < %s -simplifycfg -instcombine -S | grep "3\.5" | count 1 ; ; ModuleID = 'apf.c' diff --git a/test/Transforms/InstCombine/2009-01-31-Pressure.ll b/test/Transforms/InstCombine/2009-01-31-Pressure.ll index c3ee9a35ba..666b02e8ed 100644 --- a/test/Transforms/InstCombine/2009-01-31-Pressure.ll +++ b/test/Transforms/InstCombine/2009-01-31-Pressure.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {%B = add i8 %b, %x} +; RUN: opt < %s -instcombine -S | grep "%B = add i8 %b, %x" ; PR2698 declare void @use1(i1) diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll index a51c47d423..9146a8ee01 100644 --- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll +++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -scalarrepl -S | not grep { = alloca} +; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca" ; rdar://6417724 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it. diff --git a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll index f56fc388eb..cb8a77c23b 100644 --- a/test/Transforms/InstCombine/2009-02-21-LoadCST.ll +++ b/test/Transforms/InstCombine/2009-02-21-LoadCST.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 3679669} +; RUN: opt < %s -instcombine -S | grep "ret i32 3679669" ; PR3595 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" diff --git a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll index 0a07bf34ba..4d4797720c 100644 --- a/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll +++ b/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ashr i32 %val, 31} +; RUN: opt < %s -instcombine -S | grep "ashr i32 %val, 31" ; PR3851 define i32 @foo2(i32 %val) nounwind { diff --git a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll index 244b22a14d..b79edf66b2 100644 --- a/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll +++ b/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {mul i64} +; RUN: opt < %s -instcombine -S | grep "mul i64" ; rdar://6762288 ; Instcombine should not promote the mul to i96 because it is definitely diff --git a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll index e5355b8d3c..468c1cd8bb 100644 --- a/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll +++ b/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {store i32 0,} +; RUN: opt < %s -instcombine -S | grep "store i32 0," ; PR4366 define void @a() { diff --git a/test/Transforms/InstCombine/CPP_min_max.ll b/test/Transforms/InstCombine/CPP_min_max.ll index 531ce2b07b..b3d081b613 100644 --- a/test/Transforms/InstCombine/CPP_min_max.ll +++ b/test/Transforms/InstCombine/CPP_min_max.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep select | not grep {i32\\*} +; RUN: grep select | not grep 'i32\*' ; This testcase corresponds to PR362, which notices that this horrible code ; is generated by the C++ front-end and LLVM optimizers, which has lots of diff --git a/test/Transforms/InstCombine/JavaCompare.ll b/test/Transforms/InstCombine/JavaCompare.ll index 46b6c19f9a..8c1f307c79 100644 --- a/test/Transforms/InstCombine/JavaCompare.ll +++ b/test/Transforms/InstCombine/JavaCompare.ll @@ -1,7 +1,7 @@ ; This is the sequence of stuff that the Java front-end expands for a single ; <= comparison. Check to make sure we turn it into a <= (only) -; RUN: opt < %s -instcombine -S | grep {icmp sle i32 %A, %B} +; RUN: opt < %s -instcombine -S | grep "icmp sle i32 %A, %B" define i1 @le(i32 %A, i32 %B) { %c1 = icmp sgt i32 %A, %B ; <i1> [#uses=1] diff --git a/test/Transforms/InstCombine/add-shrink.ll b/test/Transforms/InstCombine/add-shrink.ll index cc57478663..3edb392ed1 100644 --- a/test/Transforms/InstCombine/add-shrink.ll +++ b/test/Transforms/InstCombine/add-shrink.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {add nsw i32} +; RUN: opt < %s -instcombine -S | grep "add nsw i32" ; RUN: opt < %s -instcombine -S | grep sext | count 1 ; Should only have one sext and the add should be i32 instead of i64. diff --git a/test/Transforms/InstCombine/add-sitofp.ll b/test/Transforms/InstCombine/add-sitofp.ll index 98a8cb452a..40edf7114a 100644 --- a/test/Transforms/InstCombine/add-sitofp.ll +++ b/test/Transforms/InstCombine/add-sitofp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {add nsw i32} +; RUN: opt < %s -instcombine -S | grep "add nsw i32" define double @x(i32 %a, i32 %b) nounwind { %m = lshr i32 %a, 24 diff --git a/test/Transforms/InstCombine/addnegneg.ll b/test/Transforms/InstCombine/addnegneg.ll index a3a09f27ed..ad8791d1e7 100644 --- a/test/Transforms/InstCombine/addnegneg.ll +++ b/test/Transforms/InstCombine/addnegneg.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep { sub } | count 1 +; RUN: opt < %s -instcombine -S | grep " sub " | count 1 ; PR2047 define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) { diff --git a/test/Transforms/InstCombine/adjust-for-sminmax.ll b/test/Transforms/InstCombine/adjust-for-sminmax.ll index b9b6f702eb..1fb7193821 100644 --- a/test/Transforms/InstCombine/adjust-for-sminmax.ll +++ b/test/Transforms/InstCombine/adjust-for-sminmax.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {icmp s\[lg\]t i32 %n, 0} | count 16 +; RUN: opt < %s -instcombine -S | grep "icmp s[lg]t i32 %n, 0" | count 16 ; Instcombine should recognize that this code can be adjusted ; to fit the canonical smax/smin pattern. diff --git a/test/Transforms/InstCombine/align-2d-gep.ll b/test/Transforms/InstCombine/align-2d-gep.ll index eeca5c0b1f..5bca46d5a2 100644 --- a/test/Transforms/InstCombine/align-2d-gep.ll +++ b/test/Transforms/InstCombine/align-2d-gep.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {align 16} | count 1 +; RUN: opt < %s -instcombine -S | grep "align 16" | count 1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; A multi-dimensional array in a nested loop doing vector stores that diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll index ef7185cc81..50e03479f6 100644 --- a/test/Transforms/InstCombine/alloca.ll +++ b/test/Transforms/InstCombine/alloca.ll @@ -5,8 +5,11 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 declare void @use(...) -; Zero byte allocas should be deleted. +@int = global i32 zeroinitializer + +; Zero byte allocas should be merged if they can't be deleted. ; CHECK: @test +; CHECK: alloca ; CHECK-NOT: alloca define void @test() { %X = alloca [0 x i32] ; <[0 x i32]*> [#uses=1] @@ -15,6 +18,9 @@ define void @test() { call void (...)* @use( i32* %Y ) %Z = alloca { } ; <{ }*> [#uses=1] call void (...)* @use( { }* %Z ) + %size = load i32* @int + %A = alloca {{}}, i32 %size + call void (...)* @use( {{}}* %A ) ret void } diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll index 282e88b53d..838c2f73fb 100644 --- a/test/Transforms/InstCombine/and-fcmp.ll +++ b/test/Transforms/InstCombine/and-fcmp.ll @@ -66,3 +66,14 @@ define zeroext i8 @t6(float %x, float %y) nounwind { ; CHECK: t6 ; CHECK: ret i8 0 } + +define zeroext i8 @t7(float %x, float %y) nounwind { + %a = fcmp uno float %x, %y + %b = fcmp ult float %x, %y + %c = and i1 %a, %b + %retval = zext i1 %c to i8 + ret i8 %retval +; CHECK: t7 +; CHECK: fcmp uno +; CHECK-NOT: fcmp ult +} diff --git a/test/Transforms/InstCombine/and-not-or.ll b/test/Transforms/InstCombine/and-not-or.ll index 9dce7b4e6f..a42140be28 100644 --- a/test/Transforms/InstCombine/and-not-or.ll +++ b/test/Transforms/InstCombine/and-not-or.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {and i32 %x, %y} | count 4 -; RUN: opt < %s -instcombine -S | not grep {or} +; RUN: opt < %s -instcombine -S | grep "and i32 %x, %y" | count 4 +; RUN: opt < %s -instcombine -S | not grep "or" define i32 @func1(i32 %x, i32 %y) nounwind { entry: diff --git a/test/Transforms/InstCombine/and-or-and.ll b/test/Transforms/InstCombine/and-or-and.ll index 216cd46775..34cad82f4f 100644 --- a/test/Transforms/InstCombine/and-or-and.ll +++ b/test/Transforms/InstCombine/and-or-and.ll @@ -10,7 +10,7 @@ ; Which corresponds to test1. ; RUN: opt < %s -instcombine -S | \ -; RUN: not grep {or } +; RUN: not grep "or " define i32 @test1(i32 %X, i32 %Y) { %A = and i32 %X, 7 ; <i32> [#uses=1] diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll index bd878b04a3..cc661d50c8 100644 --- a/test/Transforms/InstCombine/and-or-not.ll +++ b/test/Transforms/InstCombine/and-or-not.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -instcombine -S | grep xor | count 4 ; RUN: opt < %s -instcombine -S | not grep and -; RUN: opt < %s -instcombine -S | not grep { or} +; RUN: opt < %s -instcombine -S | not grep " or" ; PR1510 diff --git a/test/Transforms/InstCombine/and-or.ll b/test/Transforms/InstCombine/and-or.ll index b4224b38b1..0ae12a36c2 100644 --- a/test/Transforms/InstCombine/and-or.ll +++ b/test/Transforms/InstCombine/and-or.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {and i32 %a, 1} | count 4 -; RUN: opt < %s -instcombine -S | grep {or i32 %0, %b} | count 4 +; RUN: opt < %s -instcombine -S | grep "and i32 %a, 1" | count 4 +; RUN: opt < %s -instcombine -S | grep "or i32 %0, %b" | count 4 define i32 @func1(i32 %a, i32 %b) nounwind readnone { diff --git a/test/Transforms/InstCombine/apint-and-or-and.ll b/test/Transforms/InstCombine/apint-and-or-and.ll index 17d29b601e..43536d72e9 100644 --- a/test/Transforms/InstCombine/apint-and-or-and.ll +++ b/test/Transforms/InstCombine/apint-and-or-and.ll @@ -11,7 +11,7 @@ ; ; This tests arbitrary precision integers. -; RUN: opt < %s -instcombine -S | not grep {or } +; RUN: opt < %s -instcombine -S | not grep "or " ; END. define i17 @test1(i17 %X, i17 %Y) { diff --git a/test/Transforms/InstCombine/apint-and1.ll b/test/Transforms/InstCombine/apint-and1.ll index cd4cbb9cf4..fcd2dcd23a 100644 --- a/test/Transforms/InstCombine/apint-and1.ll +++ b/test/Transforms/InstCombine/apint-and1.ll @@ -1,7 +1,7 @@ ; This test makes sure that and instructions are properly eliminated. ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0. -; RUN: opt < %s -instcombine -S | not grep {and } +; RUN: opt < %s -instcombine -S | not grep "and " ; END. define i39 @test0(i39 %A) { diff --git a/test/Transforms/InstCombine/apint-and2.ll b/test/Transforms/InstCombine/apint-and2.ll index ae74472b3d..78dc8f990d 100644 --- a/test/Transforms/InstCombine/apint-and2.ll +++ b/test/Transforms/InstCombine/apint-and2.ll @@ -1,7 +1,7 @@ ; This test makes sure that and instructions are properly eliminated. ; This test is for Integer BitWidth > 64 && BitWidth <= 1024. -; RUN: opt < %s -instcombine -S | not grep {and } +; RUN: opt < %s -instcombine -S | not grep "and " ; END. diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll index 1a3340ac56..818ae6659b 100644 --- a/test/Transforms/InstCombine/apint-shift-simplify.ll +++ b/test/Transforms/InstCombine/apint-shift-simplify.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: egrep {shl|lshr|ashr} | count 3 +; RUN: egrep "shl|lshr|ashr" | count 3 define i41 @test0(i41 %A, i41 %B, i41 %C) { %X = shl i41 %A, %C diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll index 8b9ff143ea..df8ec52b5a 100644 --- a/test/Transforms/InstCombine/apint-sub.ll +++ b/test/Transforms/InstCombine/apint-sub.ll @@ -3,7 +3,7 @@ ; ; RUN: opt < %s -instcombine -S | \ -; RUN: grep -v {sub i19 %Cok, %Bok} | grep -v {sub i25 0, %Aok} | not grep sub +; RUN: grep -v "sub i19 %Cok, %Bok" | grep -v "sub i25 0, %Aok" | not grep sub ; END. define i23 @test1(i23 %A) { diff --git a/test/Transforms/InstCombine/apint-xor1.ll b/test/Transforms/InstCombine/apint-xor1.ll index 849c659833..01cbcf158c 100644 --- a/test/Transforms/InstCombine/apint-xor1.ll +++ b/test/Transforms/InstCombine/apint-xor1.ll @@ -1,7 +1,7 @@ ; This test makes sure that xor instructions are properly eliminated. ; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0. -; RUN: opt < %s -instcombine -S | not grep {xor } +; RUN: opt < %s -instcombine -S | not grep "xor " define i47 @test1(i47 %A, i47 %B) { diff --git a/test/Transforms/InstCombine/apint-xor2.ll b/test/Transforms/InstCombine/apint-xor2.ll index cacc17958e..ab93c92381 100644 --- a/test/Transforms/InstCombine/apint-xor2.ll +++ b/test/Transforms/InstCombine/apint-xor2.ll @@ -1,7 +1,7 @@ ; This test makes sure that xor instructions are properly eliminated. ; This test is for Integer BitWidth > 64 && BitWidth <= 1024. -; RUN: opt < %s -instcombine -S | not grep {xor } +; RUN: opt < %s -instcombine -S | not grep "xor " ; END. diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll index 79a096ff0f..62c9ddc8c3 100644 --- a/test/Transforms/InstCombine/bit-checks.ll +++ b/test/Transforms/InstCombine/bit-checks.ll @@ -1,7 +1,7 @@ ; This test makes sure that these instructions are properly eliminated. ; ; RUN: opt < %s -instcombine -S | \ -; RUN: not grep {tobool} +; RUN: not grep "tobool" ; END. define i32 @main(i32 %argc, i8** %argv) nounwind ssp { entry: diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll index a6fd83742c..318ca7339f 100644 --- a/test/Transforms/InstCombine/bitcount.ll +++ b/test/Transforms/InstCombine/bitcount.ll @@ -1,5 +1,5 @@ ; Tests to make sure bit counts of constants are folded -; RUN: opt < %s -instcombine -S | grep {ret i32 19} +; RUN: opt < %s -instcombine -S | grep "ret i32 19" ; RUN: opt < %s -instcombine -S | \ ; RUN: grep -v declare | not grep llvm.ct diff --git a/test/Transforms/InstCombine/bittest.ll b/test/Transforms/InstCombine/bittest.ll index 92863d5947..84ee7dd602 100644 --- a/test/Transforms/InstCombine/bittest.ll +++ b/test/Transforms/InstCombine/bittest.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -simplifycfg -S |\ -; RUN: not grep {call void @abort} +; RUN: not grep "call void @abort" @b_rec.0 = external global i32 ; <i32*> [#uses=2] diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll index 168b3e8333..ba7df3125f 100644 --- a/test/Transforms/InstCombine/bswap.ll +++ b/test/Transforms/InstCombine/bswap.ll @@ -1,7 +1,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {call.*llvm.bswap} | count 6 +; RUN: grep "call.*llvm.bswap" | count 6 define i32 @test1(i32 %i) { %tmp1 = lshr i32 %i, 24 ; <i32> [#uses=1] diff --git a/test/Transforms/InstCombine/dce-iterate.ll b/test/Transforms/InstCombine/dce-iterate.ll index 1d2cc53210..1dd4522541 100644 --- a/test/Transforms/InstCombine/dce-iterate.ll +++ b/test/Transforms/InstCombine/dce-iterate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret double .sy} +; RUN: opt < %s -instcombine -S | grep "ret double .sy" define internal double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind { entry: diff --git a/test/Transforms/InstCombine/deadcode.ll b/test/Transforms/InstCombine/deadcode.ll index 7c7f1abc0c..8fe673d8c9 100644 --- a/test/Transforms/InstCombine/deadcode.ll +++ b/test/Transforms/InstCombine/deadcode.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 %A} +; RUN: opt < %s -instcombine -S | grep "ret i32 %A" ; RUN: opt < %s -die -S | not grep call.*llvm define i32 @test(i32 %A) { diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll new file mode 100644 index 0000000000..a07f3ea949 --- /dev/null +++ b/test/Transforms/InstCombine/div-shift.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @t1(i16 zeroext %x, i32 %y) nounwind { +entry: +; CHECK: t1 +; CHECK-NOT: sdiv +; CHECK: lshr i32 %conv + %conv = zext i16 %x to i32 + %s = shl i32 2, %y + %d = sdiv i32 %conv, %s + ret i32 %d +} + +; rdar://11721329 +define i64 @t2(i64 %x, i32 %y) nounwind { +; CHECK: t2 +; CHECK-NOT: udiv +; CHECK: lshr i64 %x + %1 = shl i32 1, %y + %2 = zext i32 %1 to i64 + %3 = udiv i64 %x, %2 + ret i64 %3 +} diff --git a/test/Transforms/InstCombine/enforce-known-alignment.ll b/test/Transforms/InstCombine/enforce-known-alignment.ll index 9e9be7f565..6645d99035 100644 --- a/test/Transforms/InstCombine/enforce-known-alignment.ll +++ b/test/Transforms/InstCombine/enforce-known-alignment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep alloca | grep {align 16} +; RUN: opt < %s -instcombine -S | grep alloca | grep "align 16" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9.6" diff --git a/test/Transforms/InstCombine/fp-ret-bitcast.ll b/test/Transforms/InstCombine/fp-ret-bitcast.ll index 35ece42661..b2fbc0b882 100644 --- a/test/Transforms/InstCombine/fp-ret-bitcast.ll +++ b/test/Transforms/InstCombine/fp-ret-bitcast.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {call float bitcast} | count 1 +; RUN: grep "call float bitcast" | count 1 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.NSObject = type { %struct.objc_class* } %struct.NSArray = type { %struct.NSObject } diff --git a/test/Transforms/InstCombine/invoke.ll b/test/Transforms/InstCombine/invoke.ll new file mode 100644 index 0000000000..ea3564dae0 --- /dev/null +++ b/test/Transforms/InstCombine/invoke.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_call_unexpected(i8*) +declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly + + +; CHECK: @f1 +define i64 @f1() nounwind uwtable ssp { +entry: +; CHECK: nvoke noalias i8* undef() + %call = invoke noalias i8* undef() + to label %invoke.cont unwind label %lpad + +invoke.cont: +; CHECK: ret i64 0 + %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false) + ret i64 %0 + +lpad: + %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %2 = extractvalue { i8*, i32 } %1, 0 + tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind + unreachable +} + +; CHECK: @f2 +define i64 @f2() nounwind uwtable ssp { +entry: +; CHECK: nvoke noalias i8* null() + %call = invoke noalias i8* null() + to label %invoke.cont unwind label %lpad + +invoke.cont: +; CHECK: ret i64 0 + %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false) + ret i64 %0 + +lpad: + %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %2 = extractvalue { i8*, i32 } %1, 0 + tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind + unreachable +} diff --git a/test/Transforms/InstCombine/known_align.ll b/test/Transforms/InstCombine/known_align.ll index 5382abf821..0249951966 100644 --- a/test/Transforms/InstCombine/known_align.ll +++ b/test/Transforms/InstCombine/known_align.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {align 1} +; RUN: opt < %s -instcombine -S | grep "align 1" ; END. %struct.p = type <{ i8, i32 }> diff --git a/test/Transforms/InstCombine/loadstore-alignment.ll b/test/Transforms/InstCombine/loadstore-alignment.ll index 1d932d27f7..2263cb20ec 100644 --- a/test/Transforms/InstCombine/loadstore-alignment.ll +++ b/test/Transforms/InstCombine/loadstore-alignment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {, align 16} | count 14 +; RUN: opt < %s -instcombine -S | grep ", align 16" | count 14 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @x = external global <2 x i64>, align 16 diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll index 04aac98145..bcc9e188b9 100644 --- a/test/Transforms/InstCombine/memcpy-to-load.ll +++ b/test/Transforms/InstCombine/memcpy-to-load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {load double} +; RUN: opt < %s -instcombine -S | grep "load double" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll index 4602c12680..9d51ea0e70 100644 --- a/test/Transforms/InstCombine/memmove.ll +++ b/test/Transforms/InstCombine/memmove.ll @@ -1,6 +1,6 @@ ; This test makes sure that memmove instructions are properly eliminated. ; -; RUN: opt < %s -instcombine -S | not grep {call void @llvm.memmove} +; RUN: opt < %s -instcombine -S | not grep "call void @llvm.memmove" @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00" ; <[33 x i8]*> [#uses=1] @h = constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=1] diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll index 7f7bc9f86e..7f02dad58a 100644 --- a/test/Transforms/InstCombine/memset.ll +++ b/test/Transforms/InstCombine/memset.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset} +; RUN: opt < %s -instcombine -S | not grep "call.*llvm.memset" define i32 @main() { %target = alloca [1024 x i8] diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll index edb530585c..6c8e634763 100644 --- a/test/Transforms/InstCombine/mul.ll +++ b/test/Transforms/InstCombine/mul.ll @@ -138,8 +138,9 @@ define i32 @test16(i32 %b, i1 %c) { ; e = b & (a >> 31) %e = mul i32 %d, %b ; <i32> [#uses=1] ret i32 %e -; CHECK: [[TEST16:%.*]] = sext i1 %c to i32 -; CHECK-NEXT: %e = and i32 [[TEST16]], %b +; CHECK: [[TEST16:%.*]] = zext i1 %c to i32 +; CHECK-NEXT: %1 = sub i32 0, [[TEST16]] +; CHECK-NEXT: %e = and i32 %1, %b ; CHECK-NEXT: ret i32 %e } diff --git a/test/Transforms/InstCombine/multi-use-or.ll b/test/Transforms/InstCombine/multi-use-or.ll index 8c6a0e0bbc..8b90e0d7f6 100644 --- a/test/Transforms/InstCombine/multi-use-or.ll +++ b/test/Transforms/InstCombine/multi-use-or.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {fadd double .sx, .sy} +; RUN: opt < %s -instcombine -S | grep "fadd double .sx, .sy" ; The 'or' has multiple uses, make sure that this doesn't prevent instcombine ; from propagating the extends to the truncs. diff --git a/test/Transforms/InstCombine/narrow.ll b/test/Transforms/InstCombine/narrow.ll index 1b96a06eeb..5dd13a0966 100644 --- a/test/Transforms/InstCombine/narrow.ll +++ b/test/Transforms/InstCombine/narrow.ll @@ -1,7 +1,7 @@ ; This file contains various testcases that check to see that instcombine ; is narrowing computations when possible. ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {ret i1 false} +; RUN: grep "ret i1 false" ; test1 - Eliminating the casts in this testcase (by narrowing the AND ; operation) allows instcombine to realize the function always returns false. diff --git a/test/Transforms/InstCombine/objsize-64.ll b/test/Transforms/InstCombine/objsize-64.ll new file mode 100644 index 0000000000..4993063d1c --- /dev/null +++ b/test/Transforms/InstCombine/objsize-64.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +declare noalias i8* @malloc(i32) nounwind +declare noalias i8* @_Znwm(i64) ; new(unsigned long) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_call_unexpected(i8*) +declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly + +; CHECK: @f1 +define i64 @f1() { + %call = call i8* @malloc(i32 4) + %size = call i64 @llvm.objectsize.i64(i8* %call, i1 false) +; CHECK-NEXT: ret i64 4 + ret i64 %size +} + + +; CHECK: @f2 +define i64 @f2() nounwind uwtable ssp { +entry: +; CHECK: invoke void @llvm.donothing() + %call = invoke noalias i8* @_Znwm(i64 13) + to label %invoke.cont unwind label %lpad + +invoke.cont: +; CHECK: ret i64 13 + %0 = tail call i64 @llvm.objectsize.i64(i8* %call, i1 false) + ret i64 %0 + +lpad: + %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %2 = extractvalue { i8*, i32 } %1, 0 + tail call void @__cxa_call_unexpected(i8* %2) noreturn nounwind + unreachable +} diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll index 9bf215c952..2d2d6c286d 100644 --- a/test/Transforms/InstCombine/objsize.ll +++ b/test/Transforms/InstCombine/objsize.ll @@ -42,7 +42,7 @@ define i32 @f() nounwind { define i1 @baz() nounwind { ; CHECK: @baz -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: objectsize %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false) %2 = icmp eq i32 %1, -1 ret i1 %2 diff --git a/test/Transforms/InstCombine/odr-linkage.ll b/test/Transforms/InstCombine/odr-linkage.ll index 61365b4848..2ce62468dc 100644 --- a/test/Transforms/InstCombine/odr-linkage.ll +++ b/test/Transforms/InstCombine/odr-linkage.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 10} +; RUN: opt < %s -instcombine -S | grep "ret i32 10" @g1 = available_externally constant i32 1 @g2 = linkonce_odr constant i32 2 diff --git a/test/Transforms/InstCombine/or-to-xor.ll b/test/Transforms/InstCombine/or-to-xor.ll index 1495ee4970..8847cb7328 100644 --- a/test/Transforms/InstCombine/or-to-xor.ll +++ b/test/Transforms/InstCombine/or-to-xor.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {xor i32 %a, %b} | count 4 -; RUN: opt < %s -instcombine -S | not grep {and} +; RUN: opt < %s -instcombine -S | grep "xor i32 %a, %b" | count 4 +; RUN: opt < %s -instcombine -S | not grep "and" define i32 @func1(i32 %a, i32 %b) nounwind readnone { entry: diff --git a/test/Transforms/InstCombine/phi-merge-gep.ll b/test/Transforms/InstCombine/phi-merge-gep.ll index 2671749709..25c9cea9b7 100644 --- a/test/Transforms/InstCombine/phi-merge-gep.ll +++ b/test/Transforms/InstCombine/phi-merge-gep.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -S -instcombine > %t -; RUN: grep {= getelementptr} %t | count 20 -; RUN: grep {= phi} %t | count 13 +; RUN: grep "= getelementptr" %t | count 20 +; RUN: grep "= phi" %t | count 13 ; Don't push the geps through these phis, because they would require ; two phis each, which burdens the loop with high register pressure. diff --git a/test/Transforms/InstCombine/pr2645-0.ll b/test/Transforms/InstCombine/pr2645-0.ll index 9bcaa43a80..e8aeb2afb9 100644 --- a/test/Transforms/InstCombine/pr2645-0.ll +++ b/test/Transforms/InstCombine/pr2645-0.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {insertelement <4 x float> undef} +; RUN: opt < %s -instcombine -S | grep "insertelement <4 x float> undef" ; Instcombine should be able to prove that none of the ; insertelement's first operand's elements are needed. diff --git a/test/Transforms/InstCombine/sdiv-shift.ll b/test/Transforms/InstCombine/sdiv-shift.ll deleted file mode 100644 index f4d2b36cbb..0000000000 --- a/test/Transforms/InstCombine/sdiv-shift.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: opt < %s -instcombine -S | not grep div - -define i32 @a(i16 zeroext %x, i32 %y) nounwind { -entry: - %conv = zext i16 %x to i32 - %s = shl i32 2, %y - %d = sdiv i32 %conv, %s - ret i32 %d -} diff --git a/test/Transforms/InstCombine/select-load-call.ll b/test/Transforms/InstCombine/select-load-call.ll index bef0cf841b..b63468de53 100644 --- a/test/Transforms/InstCombine/select-load-call.ll +++ b/test/Transforms/InstCombine/select-load-call.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i32 1} +; RUN: opt < %s -instcombine -S | grep "ret i32 1" declare void @test2() diff --git a/test/Transforms/InstCombine/setcc-strength-reduce.ll b/test/Transforms/InstCombine/setcc-strength-reduce.ll index 62ab116367..138712e5a9 100644 --- a/test/Transforms/InstCombine/setcc-strength-reduce.ll +++ b/test/Transforms/InstCombine/setcc-strength-reduce.ll @@ -3,7 +3,7 @@ ; into equivalent setne,eq instructions. ; ; RUN: opt < %s -instcombine -S | \ -; RUN: grep -v {icmp eq} | grep -v {icmp ne} | not grep icmp +; RUN: grep -v "icmp eq" | grep -v "icmp ne" | not grep icmp ; END. define i1 @test1(i32 %A) { diff --git a/test/Transforms/InstCombine/shufflemask-undef.ll b/test/Transforms/InstCombine/shufflemask-undef.ll index cf87aef7df..aa6baa989b 100644 --- a/test/Transforms/InstCombine/shufflemask-undef.ll +++ b/test/Transforms/InstCombine/shufflemask-undef.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | not grep {shufflevector.\*i32 8} +; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9" diff --git a/test/Transforms/InstCombine/shufflevec-constant.ll b/test/Transforms/InstCombine/shufflevec-constant.ll index 29ae5a7982..a002b2a853 100644 --- a/test/Transforms/InstCombine/shufflevec-constant.ll +++ b/test/Transforms/InstCombine/shufflevec-constant.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>} +; RUN: opt < %s -instcombine -S | grep "ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9" diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll index 9a08c6446c..ab0e7e7ff9 100644 --- a/test/Transforms/InstCombine/signed-comparison.ll +++ b/test/Transforms/InstCombine/signed-comparison.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -instcombine -S > %t ; RUN: not grep zext %t ; RUN: not grep slt %t -; RUN: grep {icmp ult} %t +; RUN: grep "icmp ult" %t ; Instcombine should convert the zext+slt into a simple ult. diff --git a/test/Transforms/InstCombine/srem-simplify-bug.ll b/test/Transforms/InstCombine/srem-simplify-bug.ll index af824a445d..3458714b8d 100644 --- a/test/Transforms/InstCombine/srem-simplify-bug.ll +++ b/test/Transforms/InstCombine/srem-simplify-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i1 false} +; RUN: opt < %s -instcombine -S | grep "ret i1 false" ; PR2276 define i1 @f(i32 %x) { diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll index 2fc84140b3..80c2ee88f3 100644 --- a/test/Transforms/InstCombine/stack-overalign.ll +++ b/test/Transforms/InstCombine/stack-overalign.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {align 32} | count 1 +; RUN: opt < %s -instcombine -S | grep "align 32" | count 1 ; It's tempting to have an instcombine in which the src pointer of a ; memcpy is aligned up to the alignment of the destination, however diff --git a/test/Transforms/InstCombine/stacksaverestore.ll b/test/Transforms/InstCombine/stacksaverestore.ll index 0fcaefac62..f5c7a6f9f3 100644 --- a/test/Transforms/InstCombine/stacksaverestore.ll +++ b/test/Transforms/InstCombine/stacksaverestore.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {call.*stackrestore} | count 1 +; RUN: opt < %s -instcombine -S | grep "call.*stackrestore" | count 1 declare i8* @llvm.stacksave() declare void @llvm.stackrestore(i8*) diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll index 6ec342a4f5..cbbad7f797 100644 --- a/test/Transforms/InstCombine/trunc.ll +++ b/test/Transforms/InstCombine/trunc.ll @@ -12,8 +12,8 @@ define i64 @test1(i64 %a) { call void @use(i32 %b) ret i64 %d ; CHECK: @test1 -; CHECK: %d = and i64 %a, 15 -; CHECK: ret i64 %d +; CHECK-NOT: ext +; CHECK: ret } define i64 @test2(i64 %a) { %b = trunc i64 %a to i32 @@ -34,8 +34,8 @@ define i64 @test3(i64 %a) { call void @use(i32 %b) ret i64 %d ; CHECK: @test3 -; CHECK: %d = and i64 %a, 8 -; CHECK: ret i64 %d +; CHECK-NOT: ext +; CHECK: ret } define i64 @test4(i64 %a) { %b = trunc i64 %a to i32 @@ -46,8 +46,9 @@ define i64 @test4(i64 %a) { ret i64 %d ; CHECK: @test4 ; CHECK: = and i64 %a, 8 -; CHECK: %d = xor i64 {{.*}}, 8 -; CHECK: ret i64 %d +; CHECK: = xor i64 {{.*}}, 8 +; CHECK-NOT: ext +; CHECK: ret } define i32 @test5(i32 %A) { diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll index bfdd98cddf..064e721768 100644 --- a/test/Transforms/InstCombine/udiv-simplify-bug-0.ll +++ b/test/Transforms/InstCombine/udiv-simplify-bug-0.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret i64 0} | count 2 +; RUN: opt < %s -instcombine -S | grep "ret i64 0" | count 2 define i64 @foo(i32 %x) nounwind { %y = lshr i32 %x, 1 diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll index 229f1a85e8..3e94ab5b2c 100644 --- a/test/Transforms/InstCombine/urem-simplify-bug.ll +++ b/test/Transforms/InstCombine/urem-simplify-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5} +; RUN: opt < %s -instcombine -S | grep "= or i32 %x, -5" @.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1] @.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1] diff --git a/test/Transforms/InstCombine/vec_insertelt.ll b/test/Transforms/InstCombine/vec_insertelt.ll index eedf882518..e35fa5e551 100644 --- a/test/Transforms/InstCombine/vec_insertelt.ll +++ b/test/Transforms/InstCombine/vec_insertelt.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {ret <4 x i32> %A} +; RUN: opt < %s -instcombine -S | grep "ret <4 x i32> %A" ; PR1286 define <4 x i32> @test1(<4 x i32> %A) { diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll index 2be4359923..b4c41f6d29 100644 --- a/test/Transforms/InstCombine/vec_narrow.ll +++ b/test/Transforms/InstCombine/vec_narrow.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {fadd float} +; RUN: opt < %s -instcombine -S | grep "fadd float" define float @test(<4 x float> %A, <4 x float> %B, float %f) { diff --git a/test/Transforms/InstCombine/vector-srem.ll b/test/Transforms/InstCombine/vector-srem.ll index acb11c52ad..b1ed49eefa 100644 --- a/test/Transforms/InstCombine/vector-srem.ll +++ b/test/Transforms/InstCombine/vector-srem.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {srem <4 x i32>} +; RUN: opt < %s -instcombine -S | grep "srem <4 x i32>" define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) { diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll index 2256678118..7cab199a58 100644 --- a/test/Transforms/InstCombine/volatile_store.ll +++ b/test/Transforms/InstCombine/volatile_store.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -instcombine -S | grep {store volatile} -; RUN: opt < %s -instcombine -S | grep {load volatile} +; RUN: opt < %s -instcombine -S | grep "store volatile" +; RUN: opt < %s -instcombine -S | grep "load volatile" @x = weak global i32 0 ; <i32*> [#uses=2] diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll index a7bcdac08b..3722697f98 100644 --- a/test/Transforms/InstCombine/xor.ll +++ b/test/Transforms/InstCombine/xor.ll @@ -1,7 +1,7 @@ ; This test makes sure that these instructions are properly eliminated. ; ; RUN: opt < %s -instcombine -S | \ -; RUN: not grep {xor } +; RUN: not grep "xor " ; END. @G1 = global i32 0 ; <i32*> [#uses=1] @G2 = global i32 0 ; <i32*> [#uses=1] diff --git a/test/Transforms/InstCombine/zeroext-and-reduce.ll b/test/Transforms/InstCombine/zeroext-and-reduce.ll index 592b8a172f..315033dd88 100644 --- a/test/Transforms/InstCombine/zeroext-and-reduce.ll +++ b/test/Transforms/InstCombine/zeroext-and-reduce.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instcombine -S | \ -; RUN: grep {and i32 %Y, 8} +; RUN: grep "and i32 %Y, 8" define i32 @test1(i8 %X) { %Y = zext i8 %X to i32 ; <i32> [#uses=1] diff --git a/test/Transforms/InstCombine/zext-bool-add-sub.ll b/test/Transforms/InstCombine/zext-bool-add-sub.ll index 11642733ac..78bcedbbc2 100644 --- a/test/Transforms/InstCombine/zext-bool-add-sub.ll +++ b/test/Transforms/InstCombine/zext-bool-add-sub.ll @@ -1,29 +1,16 @@ -; RUN: opt < %s -instcombine -S | not grep zext +; RUN: opt < %s -instcombine -S | FileCheck %s +; rdar://11748024 -define i32 @a(i1 %x) { +define i32 @a(i1 zeroext %x, i1 zeroext %y) { entry: - %y = zext i1 %x to i32 - %res = add i32 %y, 1 - ret i32 %res -} - -define i32 @b(i1 %x) { -entry: - %y = zext i1 %x to i32 - %res = add i32 %y, -1 - ret i32 %res -} - -define i32 @c(i1 %x) { -entry: - %y = zext i1 %x to i32 - %res = sub i32 0, %y - ret i32 %res -} - -define i32 @d(i1 %x) { -entry: - %y = zext i1 %x to i32 - %res = sub i32 3, %y - ret i32 %res +; CHECK: @a +; CHECK: [[TMP1:%.*]] = zext i1 %y to i32 +; CHECK: [[TMP2:%.*]] = select i1 %x, i32 2, i32 1 +; CHECK-NEXT: sub i32 [[TMP2]], [[TMP1]] + %conv = zext i1 %x to i32 + %conv3 = zext i1 %y to i32 + %conv3.neg = sub i32 0, %conv3 + %sub = add i32 %conv, 1 + %add = add i32 %sub, %conv3.neg + ret i32 %add } diff --git a/test/Transforms/InstCombine/zext-fold.ll b/test/Transforms/InstCombine/zext-fold.ll index 9521101e73..e5f316b811 100644 --- a/test/Transforms/InstCombine/zext-fold.ll +++ b/test/Transforms/InstCombine/zext-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -S | grep {zext } | count 1 +; RUN: opt < %s -instcombine -S | grep "zext " | count 1 ; PR1570 define i32 @test2(float %X, float %Y) { diff --git a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll index b5d1065e67..6a50d4f1a3 100644 --- a/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll +++ b/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -jump-threading -S | grep {ret i32 0} +; RUN: opt < %s -jump-threading -S | grep "ret i32 0" ; PR3138 define i32 @jt() { diff --git a/test/Transforms/JumpThreading/compare.ll b/test/Transforms/JumpThreading/compare.ll index 581785c45f..9b05b4496b 100644 --- a/test/Transforms/JumpThreading/compare.ll +++ b/test/Transforms/JumpThreading/compare.ll @@ -1,5 +1,5 @@ ; There should be no phi nodes left. -; RUN: opt < %s -jump-threading -S | not grep {phi i32} +; RUN: opt < %s -jump-threading -S | not grep "phi i32" declare i32 @f1() declare i32 @f2() diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll index a4914f9634..c6e9faa171 100644 --- a/test/Transforms/JumpThreading/no-irreducible-loops.ll +++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S -verify-dom-info -verify-loop-info > %t -; RUN: grep {store volatile} %t | count 3 -; RUN: not grep {br label} %t +; RUN: grep "store volatile" %t | count 3 +; RUN: not grep "br label" %t ; Jump threading should not prevent this loop from being unrolled. diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll index 153458579b..dd43c88379 100644 --- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll +++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-simplify -lcssa -S | \ -; RUN: grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry} +; RUN: grep "%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry" %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* } diff --git a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll index ad4f1447b2..575f8163c9 100644 --- a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll +++ b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -lcssa -S | \ -; RUN: grep {%X.1.lcssa} +; RUN: grep "%X.1.lcssa" ; RUN: opt < %s -lcssa -S | \ -; RUN: not grep {%X.1.lcssa1} +; RUN: not grep "%X.1.lcssa1" declare i1 @c1() diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll index 23ab2c0ce0..4b05ad9953 100644 --- a/test/Transforms/LCSSA/basictest.ll +++ b/test/Transforms/LCSSA/basictest.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -lcssa -S | \ -; RUN: grep {X3.lcssa = phi i32} +; RUN: grep "X3.lcssa = phi i32" ; RUN: opt < %s -lcssa -S | \ -; RUN: grep {X4 = add i32 3, %X3.lcssa} +; RUN: grep "X4 = add i32 3, %X3.lcssa" define void @lcssa(i1 %S2) { entry: diff --git a/test/Transforms/LCSSA/unreachable-use.ll b/test/Transforms/LCSSA/unreachable-use.ll index c389c9cfa8..71ae134e31 100644 --- a/test/Transforms/LCSSA/unreachable-use.ll +++ b/test/Transforms/LCSSA/unreachable-use.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -lcssa -S -verify-loop-info | grep {\[%\]tmp33 = load i1\\*\\* \[%\]tmp} +; RUN: opt < %s -lcssa -S -verify-loop-info | grep "[%]tmp33 = load i1\*\* [%]tmp" ; PR6546 ; LCSSA doesn't need to transform uses in blocks not reachable diff --git a/test/Transforms/LCSSA/unused-phis.ll b/test/Transforms/LCSSA/unused-phis.ll index aa2ab96341..01b214b8e3 100644 --- a/test/Transforms/LCSSA/unused-phis.ll +++ b/test/Transforms/LCSSA/unused-phis.ll @@ -2,9 +2,9 @@ ; CHECK: exit1: ; CHECK: .lcssa = ; CHECK: exit2: -; CHECK: .lcssa2 = +; CHECK: .lcssa1 = ; CHECK: exit3: -; CHECK-NOT: .lcssa1 = +; CHECK-NOT: .lcssa ; Test to ensure that when there are multiple exit blocks, PHI nodes are ; only inserted by LCSSA when there is a use dominated by a given exit diff --git a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll index 70a04c73b1..b54d520a91 100644 --- a/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll +++ b/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll @@ -4,7 +4,7 @@ ; case... bad. ; RUN: opt < %s -licm -loop-deletion -simplifycfg -S | \ -; RUN: not grep {br } +; RUN: not grep "br " define i32 @main(i32 %argc) { ; <label>:0 diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll index 4df6ea7581..94511cc9c1 100644 --- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll +++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -licm -S | grep {store volatile} +; RUN: opt < %s -licm -S | grep "store volatile" ; PR1435 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll index 4e100d3aee..f9fc551df3 100644 --- a/test/Transforms/LICM/hoist-invariant-load.ll +++ b/test/Transforms/LICM/hoist-invariant-load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -licm -stats -S |& grep "1 licm" +; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm" @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll index 9cb55b4119..0666e0f06a 100644 --- a/test/Transforms/LoopRotate/PhiRename-1.ll +++ b/test/Transforms/LoopRotate/PhiRename-1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | not grep {\\\[ .tmp224} +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | not grep "[ .tmp224" ; END. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/Transforms/LoopSimplify/indirectbr.ll b/test/Transforms/LoopSimplify/indirectbr.ll index 9814d4ad93..ca05f437e5 100644 --- a/test/Transforms/LoopSimplify/indirectbr.ll +++ b/test/Transforms/LoopSimplify/indirectbr.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \ -; RUN: | grep -F {indirectbr i8* %x, \[label %L0, label %L1\]} \ +; RUN: | grep -F "indirectbr i8* %x, [label %L0, label %L1]" \ ; RUN: | count 6 ; LoopSimplify should not try to transform loops when indirectbr is involved. diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll index 40ad2f44e7..8de5938939 100644 --- a/test/Transforms/LoopSimplify/merge-exits.ll +++ b/test/Transforms/LoopSimplify/merge-exits.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t ; RUN: not grep sext %t -; RUN: grep {phi i64} %t | count 1 +; RUN: grep "phi i64" %t | count 1 ; Loopsimplify should be able to merge the two loop exits ; into one, so that loop rotate can rotate the loop, so diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll index 23ac7f257a..854c612f02 100644 --- a/test/Transforms/LoopSimplify/preserve-scev.ll +++ b/test/Transforms/LoopSimplify/preserve-scev.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>} +; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep "%cmp = icmp slt i32" | grep "= {%\.ph,+,1}<%for.cond>" ; PR8079 ; LoopSimplify should invalidate indvars when splitting out the diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll new file mode 100644 index 0000000000..70ead330c1 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a8 | FileCheck %s +; +; LSR should only check for valid address modes when the IV user is a +; memory address. +; svn r158536, rdar://11635990 +; +; Note that we still don't produce the best code here because we fail +; to coalesce the IV. See <rdar://problem/11680670> [coalescer] IVs +; need to be scheduled to expose coalescing. + +; LSR before the fix: +;The chosen solution requires 4 regs, with addrec cost 1, plus 3 base adds, plus 2 setup cost: +; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32 +; reg(%v3) + reg({0,+,-1}<%while.cond.i.i>) + imm(1) +; LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32 +; reg(%v3) + reg({0,+,-1}<%while.cond.i.i>) +; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32* +; reg((-4 + (4 * %v3) + %v1)) + 4*reg({0,+,-1}<%while.cond.i.i>) +; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32* +; reg((-4 + (4 * %v3) + %v4)) + 4*reg({0,+,-1}<%while.cond.i.i>) +; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32 +; reg(%v3) +; +; LSR after the fix: +;The chosen solution requires 4 regs, with addrec cost 1, plus 1 base add, plus 2 setup cost: +; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32 +; reg({%v3,+,-1}<nsw><%while.cond.i.i>) + imm(1) +; LSR Use: Kind=ICmpZero, Offsets={0}, widest fixup type: i32 +; reg({%v3,+,-1}<nsw><%while.cond.i.i>) +; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32* +; reg((-4 + %v1)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>) +; LSR Use: Kind=Address of i32, Offsets={0}, widest fixup type: i32* +; reg((-4 + %v4)) + 4*reg({%v3,+,-1}<nsw><%while.cond.i.i>) +; LSR Use: Kind=Special, Offsets={0}, all-fixups-outside-loop, widest fixup type: i32 +; reg(%v3) + + +%s = type { i32* } + +@ncol = external global i32, align 4 + +declare i32* @getptr() nounwind +declare %s* @getstruct() nounwind + +; CHECK: @main +; Check that the loop preheader contains no address computation. +; CHECK: %entry +; CHECK-NOT: add{{.*}}lsl +; CHECK: ldr{{.*}}lsl #2 +; CHECK: ldr{{.*}}lsl #2 +define i32 @main() nounwind ssp { +entry: + %v0 = load i32* @ncol, align 4, !tbaa !0 + %v1 = tail call i32* @getptr() nounwind + %cmp10.i = icmp eq i32 %v0, 0 + br label %while.cond.outer + +while.cond.outer: + %call18 = tail call %s* @getstruct() nounwind + br label %while.cond + +while.cond: + %cmp20 = icmp eq i32* %v1, null + br label %while.body + +while.body: + %v3 = load i32* @ncol, align 4, !tbaa !0 + br label %while.cond.i + +while.cond.i: + %state.i = getelementptr inbounds %s* %call18, i32 0, i32 0 + %v4 = load i32** %state.i, align 4, !tbaa !3 + br label %while.cond.i.i + +while.cond.i.i: + %counter.0.i.i = phi i32 [ %v3, %while.cond.i ], [ %dec.i.i, %land.rhs.i.i ] + %dec.i.i = add nsw i32 %counter.0.i.i, -1 + %tobool.i.i = icmp eq i32 %counter.0.i.i, 0 + br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i + +land.rhs.i.i: + %arrayidx.i.i = getelementptr inbounds i32* %v4, i32 %dec.i.i + %v5 = load i32* %arrayidx.i.i, align 4, !tbaa !0 + %arrayidx1.i.i = getelementptr inbounds i32* %v1, i32 %dec.i.i + %v6 = load i32* %arrayidx1.i.i, align 4, !tbaa !0 + %cmp.i.i = icmp eq i32 %v5, %v6 + br i1 %cmp.i.i, label %while.cond.i.i, label %equal_data.exit.i + +equal_data.exit.i: + ret i32 %counter.0.i.i + +where.exit: + br label %while.end.i + +while.end.i: + ret i32 %v3 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} diff --git a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll index 4136486fef..f7a82f6616 100644 --- a/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll +++ b/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-reduce -S | \ -; RUN: not grep {bitcast i32 1 to i32} +; RUN: not grep "bitcast i32 1 to i32" ; END. ; The setlt wants to use a value that is incremented one more than the dominant ; IV. Don't insert the 1 outside the loop, preventing folding it into the add. diff --git a/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/test/Transforms/LoopStrengthReduce/dont_reverse.ll index 4c5db04b21..d65213d06a 100644 --- a/test/Transforms/LoopStrengthReduce/dont_reverse.ll +++ b/test/Transforms/LoopStrengthReduce/dont_reverse.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-reduce -S \ -; RUN: | grep {icmp eq i2 %lsr.iv.next, %xmp4344} +; RUN: | grep "icmp eq i2 %lsr.iv.next, %xmp4344" ; Don't reverse the iteration if the rhs of the compare is defined ; inside the loop. diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll index 2ca678761f..5c18809e59 100644 --- a/test/Transforms/LoopStrengthReduce/invariant_value_first.ll +++ b/test/Transforms/LoopStrengthReduce/invariant_value_first.ll @@ -1,6 +1,6 @@ ; Check that the index of 'P[outer]' is pulled out of the loop. ; RUN: opt < %s -loop-reduce -S | \ -; RUN: not grep {getelementptr.*%outer.*%INDVAR} +; RUN: not grep "getelementptr.*%outer.*%INDVAR" target datalayout = "e-p:32:32:32-n8:16:32" declare i1 @pred() diff --git a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll index 86c4d915a0..8eb8f05a8a 100644 --- a/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll +++ b/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll @@ -1,6 +1,6 @@ ; Check that the index of 'P[outer]' is pulled out of the loop. ; RUN: opt < %s -loop-reduce -S | \ -; RUN: not grep {getelementptr.*%outer.*%INDVAR} +; RUN: not grep "getelementptr.*%outer.*%INDVAR" target datalayout = "e-p:32:32:32-n32" declare i1 @pred() diff --git a/test/Transforms/LoopStrengthReduce/pr2570.ll b/test/Transforms/LoopStrengthReduce/pr2570.ll index 80efb9f87e..7b569713a9 100644 --- a/test/Transforms/LoopStrengthReduce/pr2570.ll +++ b/test/Transforms/LoopStrengthReduce/pr2570.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 8 +; RUN: opt < %s -loop-reduce -S | grep "phi\>" | count 8 ; PR2570 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll index 59f14fcd1c..011824116b 100644 --- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll +++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)} +; RUN: opt < %s -analyze -iv-users | grep "{1,+,3,+,2}<%loop> (post-inc with loop %loop)" ; The value of %r is dependent on a polynomial iteration expression. diff --git a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll index 5ed37dd6a2..005e4c6b22 100644 --- a/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll +++ b/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-reduce -S | \ -; RUN: grep {add i32 %indvar630.ui, 1} +; RUN: grep "add i32 %indvar630.ui, 1" ; ; Make sure that the use of the IV outside of the loop (the store) uses the ; post incremented value of the IV, not the preincremented value. This diff --git a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll index 64ef4f951f..3405b26f1f 100644 --- a/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll +++ b/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll @@ -1,10 +1,10 @@ ; Base should not be i*3, it should be i*2. ; RUN: opt < %s -loop-reduce -S | \ -; RUN: not grep {mul.*%i, 3} +; RUN: not grep "mul.*%i, 3" ; Indvar should not start at zero: ; RUN: opt < %s -loop-reduce -S | \ -; RUN: not grep {phi i32 .* 0} +; RUN: not grep "phi i32 .* 0" ; END. ; mul uint %i, 3 diff --git a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll index 20f2c2bfd7..9d73d31d50 100644 --- a/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll +++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unswitch -stats -disable-output |& grep "1 loop-unswitch - Number of branches unswitched" | count 1 +; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | grep "1 loop-unswitch - Number of branches unswitched" | count 1 ; PR 3170 define i32 @a(i32 %x, i32 %y) nounwind { entry: diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll index 8389fe4643..c1fd588106 100644 --- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll @@ -35,11 +35,11 @@ ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us ; CHECK-NEXT: %var_val.us2 = load i32* %var ; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [ -; CHECK-NEXT: i32 1, label %inc.us3 -; CHECK-NEXT: i32 2, label %dec.us4 +; CHECK-NEXT: i32 1, label %inc.us4 +; CHECK-NEXT: i32 2, label %dec.us3 ; CHECK-NEXT: ] -; CHECK: dec.us4: ; preds = %loop_begin.us1 +; CHECK: dec.us3: ; preds = %loop_begin.us1 ; CHECK-NEXT: call void @decf() noreturn nounwind ; CHECK-NEXT: br label %loop_begin.backedge.us5 @@ -81,7 +81,7 @@ inc: dec: call void @decf() noreturn nounwind br label %loop_begin -default: +default: br label %loop_exit loop_exit: ret i32 0 diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll index 05d98d513e..f3db471199 100644 --- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll @@ -19,15 +19,15 @@ ; CHECK: switch i32 1, label %second_switch.us [ ; CHECK-NEXT: i32 1, label %inc.us -; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us -; CHECK-NEXT: call void @incf() noreturn nounwind -; CHECK-NEXT: br label %loop_begin.backedge.us - ; CHECK: second_switch.us: ; preds = %loop_begin.us ; CHECK-NEXT: switch i32 %d, label %default.us [ ; CHECK-NEXT: i32 1, label %inc.us ; CHECK-NEXT: ] +; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us + ; CHECK: .split: ; preds = %..split_crit_edge ; CHECK-NEXT: br label %loop_begin @@ -73,7 +73,7 @@ inc: call void @incf() noreturn nounwind br label %loop_begin -default: +default: br label %loop_begin loop_exit: diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll index 1b186d6bec..270899642f 100644 --- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll @@ -25,14 +25,14 @@ ; CHECK-NEXT: switch i32 1, label %second_switch.us.us [ ; CHECK-NEXT: i32 1, label %inc.us.us -; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us -; CHECK-NEXT: call void @incf() noreturn nounwind -; CHECK-NEXT: br label %loop_begin.backedge.us.us - ; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us ; CHECK-NEXT: switch i32 1, label %default.us.us [ ; CHECK-NEXT: i32 1, label %inc.us.us +; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us.us + ; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge ; CHECK-NEXT: br label %loop_begin.us @@ -41,10 +41,6 @@ ; CHECK-NEXT: switch i32 1, label %second_switch.us [ ; CHECK-NEXT: i32 1, label %inc.us -; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us -; CHECK-NEXT: call void @incf() noreturn nounwind -; CHECK-NEXT: br label %loop_begin.backedge.us - ; CHECK: second_switch.us: ; preds = %loop_begin.us ; CHECK-NEXT: switch i32 %d, label %default.us [ ; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge @@ -53,6 +49,10 @@ ; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us ; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us +; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us + ; CHECK: .split: ; preds = %..split_crit_edge ; CHECK-NEXT: %3 = icmp eq i32 %d, 1 ; CHECK-NEXT: br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge @@ -65,21 +65,21 @@ ; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us ; CHECK-NEXT: %var_val.us2 = load i32* %var -; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [ +; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [ ; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us ; CHECK-NEXT: ] -; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4 -; CHECK-NEXT: call void @incf() noreturn nounwind -; CHECK-NEXT: br label %loop_begin.backedge.us6 - -; CHECK: second_switch.us4: ; preds = %loop_begin.us1 +; CHECK: second_switch.us3: ; preds = %loop_begin.us1 ; CHECK-NEXT: switch i32 1, label %default.us5 [ -; CHECK-NEXT: i32 1, label %inc.us3 +; CHECK-NEXT: i32 1, label %inc.us4 ; CHECK-NEXT: ] +; CHECK: inc.us4: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3 +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us6 + ; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1 -; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3 +; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4 ; CHECK: .split.split: ; preds = %.split..split.split_crit_edge ; CHECK-NEXT: br label %loop_begin @@ -127,7 +127,7 @@ inc: call void @incf() noreturn nounwind br label %loop_begin -default: +default: br label %loop_begin loop_exit: diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll index b95ad91a36..d124be5f90 100644 --- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp +; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp ; PR2077 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll index 24cf576a08..61ba3c7e6c 100644 --- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.} +; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy." target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" %a = type { i32 } diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll index 8eac7da798..1bbb5fe865 100644 --- a/test/Transforms/MemCpyOpt/sret.ll +++ b/test/Transforms/MemCpyOpt/sret.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy} +; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" diff --git a/test/Transforms/MergeFunc/fold-weak.ll b/test/Transforms/MergeFunc/fold-weak.ll index 23e4d33c3a..4df6e39c12 100644 --- a/test/Transforms/MergeFunc/fold-weak.ll +++ b/test/Transforms/MergeFunc/fold-weak.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -mergefunc -S > %t -; RUN: grep {define weak} %t | count 2 -; RUN: grep {call} %t | count 2 +; RUN: grep "define weak" %t | count 2 +; RUN: grep "call" %t | count 2 ; XFAIL: * ; This test is off for a bit as we change this particular sort of folding to diff --git a/test/Transforms/MergeFunc/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll index 7b2a2fe5d5..fd0baffb31 100644 --- a/test/Transforms/MergeFunc/phi-speculation1.ll +++ b/test/Transforms/MergeFunc/phi-speculation1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mergefunc -stats -disable-output |& not grep {functions merged} +; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged" define i32 @foo1(i32 %x) { entry: diff --git a/test/Transforms/MergeFunc/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll index f080191ef8..eec8b5c5a9 100644 --- a/test/Transforms/MergeFunc/phi-speculation2.ll +++ b/test/Transforms/MergeFunc/phi-speculation2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -mergefunc -stats -disable-output |& grep {functions merged} +; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged" define i32 @foo1(i32 %x) { entry: diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll index 6954fcec3d..4af079f8cd 100644 --- a/test/Transforms/MergeFunc/vector.ll +++ b/test/Transforms/MergeFunc/vector.ll @@ -1,4 +1,4 @@ -; RUN: opt -mergefunc -stats -disable-output < %s |& grep {functions merged} +; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged" ; This test is checks whether we can merge ; vector<intptr_t>::push_back(0) diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll index 69fa837675..1510ed00e6 100644 --- a/test/Transforms/ObjCARC/contract-testcases.ll +++ b/test/Transforms/ObjCARC/contract-testcases.ll @@ -4,17 +4,17 @@ %0 = type opaque %1 = type opaque %2 = type { i64, i64 } -%3 = type { i8*, i8* } %4 = type opaque declare %0* @"\01-[NSAttributedString(Terminal) pathAtIndex:effectiveRange:]"(%1*, i8* nocapture, i64, %2*) optsize declare i8* @objc_retainAutoreleasedReturnValue(i8*) -declare i8* @objc_msgSend_fixup(i8*, %3*, ...) +declare i8* @objc_msgSend_fixup(i8*, i8*, ...) +declare i8* @objc_msgSend(i8*, i8*, ...) declare void @objc_release(i8*) declare %2 @NSUnionRange(i64, i64, i64, i64) optsize declare i8* @objc_autoreleaseReturnValue(i8*) declare i8* @objc_autorelease(i8*) -declare i8* @objc_msgSend() nonlazybind +declare i32 @__gxx_personality_sj0(...) ; Don't get in trouble on bugpointed code. @@ -52,7 +52,7 @@ bb6: ; preds = %bb5, %bb4, %bb4, %b ; CHECK: %tmp8 = phi %0* [ %0, %bb ], [ %0, %bb ] define void @test1() { bb: - %tmp = tail call %0* bitcast (i8* ()* @objc_msgSend to %0* ()*)() + %tmp = tail call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* ()*)() %tmp2 = bitcast %0* %tmp to i8* %tmp3 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tmp2) nounwind br i1 undef, label %bb7, label %bb7 @@ -61,3 +61,30 @@ bb7: ; preds = %bb6, %bb6, %bb5 %tmp8 = phi %0* [ %tmp, %bb ], [ %tmp, %bb ] unreachable } + +; When looking for the defining instruction for an objc_retainAutoreleasedReturnValue +; call, handle the case where it's an invoke in a different basic block. +; rdar://11714057 + +; CHECK: define void @_Z6doTestP8NSString() { +; CHECK: invoke.cont: ; preds = %entry +; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""() +; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind +define void @_Z6doTestP8NSString() { +entry: + %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)() + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind + unreachable + +lpad: ; preds = %entry + %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + cleanup + resume { i8*, i32 } undef +} + +!clang.arc.retainAutoreleasedReturnValueMarker = !{!0} + +!0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"} diff --git a/test/Transforms/PruneEH/simplenoreturntest.ll b/test/Transforms/PruneEH/simplenoreturntest.ll index 61e2f15c0d..ec5d100d75 100644 --- a/test/Transforms/PruneEH/simplenoreturntest.ll +++ b/test/Transforms/PruneEH/simplenoreturntest.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -prune-eh -S | not grep {ret i32} +; RUN: opt < %s -prune-eh -S | not grep "ret i32" declare void @noreturn() noreturn diff --git a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll index 33e44d4ba7..f66148bb4a 100644 --- a/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll +++ b/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -reassociate -instcombine -S |\ -; RUN: grep {ret i32 0} +; RUN: grep "ret i32 0" define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %tmp.2 = add i32 %a4, %a3 ; <i32> [#uses=1] diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll index 601b97647d..ce586e15fb 100644 --- a/test/Transforms/Reassociate/crash.ll +++ b/test/Transforms/Reassociate/crash.ll @@ -83,3 +83,64 @@ define i128 @foo() { %mul = mul i128 0, 0 ret i128 %mul } + +define void @infinite_loop() { +entry: + br label %loop +loop: + %x = phi i32 [undef, %entry], [%x, %loop] + %dead = add i32 %x, 0 + br label %loop +unreachable1: + %y1 = add i32 %y1, 0 + %z1 = add i32 %y1, 0 + ret void +unreachable2: + %y2 = add i32 %y2, 0 + %z2 = add i32 %y2, %y2 + ret void +unreachable3: + %y3 = add i32 %y3, %y3 + %z3 = add i32 %y3, 0 + ret void +unreachable4: + %y4 = add i32 %y4, %y4 + %z4 = add i32 %y4, %y4 + ret void +} + +; PR13185 +define void @pr13185(i16 %p) { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %x.0 = phi i32 [ undef, %entry ], [ %conv, %for.cond ] + %conv = zext i16 %p to i32 + br label %for.cond +} + +; PR12963 +@a = external global i8 +define i8 @f0(i8 %x) { + %t0 = load i8* @a + %t1 = mul i8 %x, %x + %t2 = mul i8 %t1, %t1 + %t3 = mul i8 %t2, %t2 + %t4 = mul i8 %t3, %x + %t5 = mul i8 %t4, %t4 + %t6 = mul i8 %t5, %x + %t7 = mul i8 %t6, %t0 + ret i8 %t7 +} + +define i32 @sozefx_(i32 %x, i32 %y) { + %t0 = sub i32 %x, %x + %t1 = mul i32 %t0, %t0 + %t2 = mul i32 %x, %t0 + %t3 = mul i32 %t1, %t1 + %t4 = add i32 %t2, %t3 + %t5 = mul i32 %x, %y + %t6 = add i32 %t4, %t5 + ret i32 %t6 +} diff --git a/test/Transforms/Reassociate/shifttest.ll b/test/Transforms/Reassociate/shifttest.ll index 8b2cbc98c4..d9a5336fbf 100644 --- a/test/Transforms/Reassociate/shifttest.ll +++ b/test/Transforms/Reassociate/shifttest.ll @@ -1,7 +1,7 @@ ; With shl->mul reassociation, we can see that this is (shl A, 9) * A ; ; RUN: opt < %s -reassociate -instcombine -S |\ -; RUN: grep {shl .*, 9} +; RUN: grep "shl .*, 9" define i32 @test(i32 %A, i32 %B) { %X = shl i32 %A, 5 ; <i32> [#uses=1] diff --git a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll index 4adfde3bfe..c847b4eaca 100644 --- a/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll +++ b/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sccp -S | grep {ret i32 1} +; RUN: opt < %s -sccp -S | grep "ret i32 1" ; This function definitely returns 1, even if we don't know the direction ; of the branch. diff --git a/test/Transforms/SCCP/2006-12-19-UndefBug.ll b/test/Transforms/SCCP/2006-12-19-UndefBug.ll index ec69ce05fe..ede1a32c5f 100644 --- a/test/Transforms/SCCP/2006-12-19-UndefBug.ll +++ b/test/Transforms/SCCP/2006-12-19-UndefBug.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -sccp -S | \ -; RUN: grep {ret i1 false} +; RUN: grep "ret i1 false" define i1 @foo() { %X = and i1 false, undef ; <i1> [#uses=1] diff --git a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll index a40455cf5d..e7168dda08 100644 --- a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll +++ b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sccp -S | grep {ret i32 %Z} +; RUN: opt < %s -sccp -S | grep "ret i32 %Z" ; rdar://5778210 declare {i32, i32} @bar(i32 %A) diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll index 63f41dbc02..4688643ebd 100644 --- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll +++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sccp -S | not grep {ret i32 undef} +; RUN: opt < %s -sccp -S | not grep "ret i32 undef" ; PR2358 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll index f62ed7048e..c05f897f1a 100644 --- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll +++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -ipsccp -S | grep {ret i32 42} -; RUN: opt < %s -ipsccp -S | grep {ret i32 undef} +; RUN: opt < %s -ipsccp -S | grep "ret i32 42" +; RUN: opt < %s -ipsccp -S | grep "ret i32 undef" ; PR3325 define i32 @main() { diff --git a/test/Transforms/SCCP/apint-array.ll b/test/Transforms/SCCP/apint-array.ll index 1e75878f3a..888b9e1d22 100644 --- a/test/Transforms/SCCP/apint-array.ll +++ b/test/Transforms/SCCP/apint-array.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sccp -S | grep {ret i101 12} +; RUN: opt < %s -sccp -S | grep "ret i101 12" @Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,i101 -123456789000000, i101 0,i101 9123456789000000] diff --git a/test/Transforms/SCCP/apint-basictest4.ll b/test/Transforms/SCCP/apint-basictest4.ll index 862426020e..572f97c572 100644 --- a/test/Transforms/SCCP/apint-basictest4.ll +++ b/test/Transforms/SCCP/apint-basictest4.ll @@ -4,7 +4,7 @@ ; RUN: opt < %s -sccp -S | not grep and ; RUN: opt < %s -sccp -S | not grep trunc -; RUN: opt < %s -sccp -S | grep {ret i100 -1} +; RUN: opt < %s -sccp -S | grep "ret i100 -1" define i100 @test(i133 %A) { %B = and i133 0, %A diff --git a/test/Transforms/SCCP/apint-ipsccp1.ll b/test/Transforms/SCCP/apint-ipsccp1.ll index fda40f53fe..f6f18fe66f 100644 --- a/test/Transforms/SCCP/apint-ipsccp1.ll +++ b/test/Transforms/SCCP/apint-ipsccp1.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -ipsccp -S | grep -v {ret i512 undef} | \ -; RUN: grep {ret i8 2} +; RUN: opt < %s -ipsccp -S | grep -v "ret i512 undef" | \ +; RUN: grep "ret i8 2" define internal i512 @test(i1 %B) { br i1 %B, label %BB1, label %BB2 diff --git a/test/Transforms/SCCP/apint-ipsccp2.ll b/test/Transforms/SCCP/apint-ipsccp2.ll index 3c02e05548..834cca4884 100644 --- a/test/Transforms/SCCP/apint-ipsccp2.ll +++ b/test/Transforms/SCCP/apint-ipsccp2.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -ipsccp -S | grep -v {ret i101 0} | \ -; RUN: grep -v {ret i101 undef} | not grep ret +; RUN: opt < %s -ipsccp -S | grep -v "ret i101 0" | \ +; RUN: grep -v "ret i101 undef" | not grep ret define internal i101 @bar(i101 %A) { diff --git a/test/Transforms/SCCP/logical-nuke.ll b/test/Transforms/SCCP/logical-nuke.ll index b3d845c7ee..45f6f44a0e 100644 --- a/test/Transforms/SCCP/logical-nuke.ll +++ b/test/Transforms/SCCP/logical-nuke.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sccp -S | grep {ret i32 0} +; RUN: opt < %s -sccp -S | grep "ret i32 0" ; Test that SCCP has basic knowledge of when and/or nuke overdefined values. diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll index eb1c945e34..0b5e4152c4 100644 --- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll +++ b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll @@ -1,7 +1,7 @@ ; Scalar replacement was incorrectly promoting this alloca!! ; ; RUN: opt < %s -scalarrepl -S | \ -; RUN: sed {s/;.*//g} | grep {\\\[} +; RUN: sed "s/;.*//g" | grep "\[" define i8* @test() { %A = alloca [30 x i8] ; <[30 x i8]*> [#uses=1] diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll index 00e43a7904..77c7b54b63 100644 --- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll +++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -S | grep {alloca %%T} +; RUN: opt < %s -scalarrepl -S | grep "alloca %%T" %T = type { [80 x i8], i32, i32 } declare i32 @.callback_1(i8*) diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll index 8bc4ff0b3f..a53f3deadc 100644 --- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll +++ b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i8 17} +; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17" ; rdar://5707076 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin9.1.0" diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll index 71ba601833..f597613ef2 100644 --- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll +++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -S | grep {call.*mem} +; RUN: opt < %s -scalarrepl -S | grep "call.*mem" ; PR2369 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll index 7cccb19714..b2a9d43b22 100644 --- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll +++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -S | grep {s = alloca .struct.x} +; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x" ; PR2423 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll index 9c0f2030c0..3c8a364edc 100644 --- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll +++ b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i32 %x} +; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i386-pc-linux-gnu" diff --git a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll index f8ab875bac..67228a7a3c 100644 --- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll +++ b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep {ret i32 42} +; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42" ; PR3489 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "x86_64-apple-darwin10.0" diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll index 3218d599d1..a4182d4c1d 100644 --- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll +++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll @@ -1,6 +1,6 @@ ; The store into %p should end up with a known alignment of 1, since the memcpy ; is only known to access it with 1-byte alignment. -; RUN: opt < %s -scalarrepl -S | grep {store i16 1, .*, align 1} +; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1" ; PR3720 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll index 98fa1c684c..459688547f 100644 --- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll +++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll @@ -10,8 +10,7 @@ target triple = "x86_64-apple-macosx10.7.0" ; CHECK: main ; CHECK-NOT: alloca -; CHECK: %[[A:[a-z0-9]*]] = and i128 -; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32 +; CHECK: extractelement <2 x float> zeroinitializer, i32 0 define void @main() uwtable ssp { entry: @@ -28,8 +27,7 @@ entry: ; CHECK: test1 ; CHECK-NOT: alloca -; CHECK: %[[A:[a-z0-9]*]] = and i128 -; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32 +; CHECK: extractelement <2 x float> zeroinitializer, i32 0 define void @test1() uwtable ssp { entry: @@ -43,9 +41,8 @@ entry: ; CHECK: test2 ; CHECK-NOT: alloca -; CHECK: and i128 -; CHECK: or i128 -; CHECK: trunc i128 +; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0 +; CHECK: fadd float %[[A]], 1.000000e+00 ; CHECK-NOT: insertelement ; CHECK-NOT: extractelement @@ -62,3 +59,17 @@ entry: %r = fadd float %r1, %r2 ret float %r } + +; CHECK: test3 +; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> <float 2.000000e+00, float 3.000000e+00>, i32 1 +; CHECK: ret float %[[A]] + +define float @test3() { +entry: + %ai = alloca { <2 x float>, <2 x float> }, align 8 + store { <2 x float>, <2 x float> } {<2 x float> <float 0.0, float 1.0>, <2 x float> <float 2.0, float 3.0>}, { <2 x float>, <2 x float> }* %ai, align 8 + %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]* + %arrayidx = getelementptr inbounds [4 x float]* %tmpcast, i64 0, i64 3 + %f = load float* %arrayidx, align 4 + ret float %f +} diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll new file mode 100644 index 0000000000..565cd76164 --- /dev/null +++ b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll @@ -0,0 +1,167 @@ +; RUN: opt < %s -scalarrepl -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK: @test1 +; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float> +; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]] +; CHECK: memset +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 + +; Split the array but don't replace the memset with an insert +; element as its not a constant offset. +; The load, however, can be replaced with an extract element. +define float @test1(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + %cast = bitcast float* %ptr1 to i8* + call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false) + %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test2 +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2 + +; Do SROA on the array when it has dynamic vector reads and writes. +define float @test2(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: test3 +; CHECK: %0 = alloca [4 x <4 x float>] +; CHECK-NOT: alloca + +; Don't do SROA on a dynamically indexed vector when it spans +; more than one array element of the alloca array it is within. +define float @test3(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* + %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: test4 +; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <16 x float> %0, i32 %idx2 + +; Don't do SROA on a dynamically indexed vector when it spans +; more than one array element of the alloca array it is within. +; However, unlike test3, the store is on the vector type +; so SROA will convert the large alloca into the large vector +; type and do all accesses with insert/extract element +define float @test4(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>* + store <16 x float> zeroinitializer, <16 x float>* %bigvec + %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: @test5 +; CHECK: %0 = alloca [4 x <4 x float>] +; CHECK-NOT: alloca + +; Don't do SROA as the is a second dynamically indexed array +; which may span multiple elements of the alloca. +define float @test5(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca [4 x <4 x float>] + store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0 + %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1 + %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]* + %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr4 + ret float %ret +} + +; CHECK: test6 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 + +%vector.pair = type { %vector.anon, %vector.anon } +%vector.anon = type { %vector } +%vector = type { <4 x float> } + +; Dynamic GEPs on vectors were crashing when the vector was inside a struct +; as the new GEP for the new alloca might not include all the indices from +; the original GEP, just the indices it needs to get to the correct offset of +; some type, not necessarily the dynamic vector. +; This test makes sure we don't have this crash. +define float @test6(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca %vector.pair + store %vector.pair zeroinitializer, %vector.pair* %0 + %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: test7 +; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1 +; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2 + +%array.pair = type { [2 x %array.anon], %array.anon } +%array.anon = type { [2 x %vector] } + +; This is the same as test6 and tests the same crash, but on arrays. +define float @test7(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca %array.pair + store %array.pair zeroinitializer, %array.pair* %0 + %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1 + store float 1.0, float* %ptr1 + %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2 + %ret = load float* %ptr2 + ret float %ret +} + +; CHECK: test8 +; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1 +; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]] +; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2 +; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]] + +; Do SROA on the vector when it has dynamic vector reads and writes +; from a non-zero offset. +define float @test8(i32 %idx1, i32 %idx2) { +entry: + %0 = alloca <4 x float> + store <4 x float> zeroinitializer, <4 x float>* %0 + %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1 + %ptr2 = bitcast float* %ptr1 to <3 x float>* + %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1 + store float 1.0, float* %ptr3 + %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2 + %ptr5 = bitcast float* %ptr4 to <2 x float>* + %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2 + %ret = load float* %ptr6 + ret float %ret +} + +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll index 0d61e5aab6..3510dfc24d 100644 --- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll +++ b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll @@ -1,6 +1,6 @@ ; PR1226 ; RUN: opt < %s -scalarrepl -S | \ -; RUN: not grep {call void @llvm.memcpy.i32} +; RUN: not grep "call void @llvm.memcpy.i32" ; RUN: opt < %s -scalarrepl -S | grep getelementptr ; END. diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll index 42e7a0ffdc..95ecf175ee 100644 --- a/test/Transforms/ScalarRepl/memset-aggregate.ll +++ b/test/Transforms/ScalarRepl/memset-aggregate.ll @@ -1,7 +1,7 @@ ; PR1226 -; RUN: opt < %s -scalarrepl -S | grep {ret i32 16843009} +; RUN: opt < %s -scalarrepl -S | grep "ret i32 16843009" ; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i16 514} +; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i16 514" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll index f873456b3c..67fefb4841 100644 --- a/test/Transforms/ScalarRepl/not-a-vector.ll +++ b/test/Transforms/ScalarRepl/not-a-vector.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -scalarrepl -S | not grep alloca -; RUN: opt < %s -scalarrepl -S | not grep {7 x double} -; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret double %B} +; RUN: opt < %s -scalarrepl -S | not grep "7 x double" +; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret double %B" target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" define double @test(double %A, double %B) { diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll index 8b7e50df31..6a49918871 100644 --- a/test/Transforms/ScalarRepl/union-fp-int.ll +++ b/test/Transforms/ScalarRepl/union-fp-int.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -scalarrepl -S | \ ; RUN: not grep alloca ; RUN: opt < %s -scalarrepl -S | \ -; RUN: grep {bitcast.*float.*i32} +; RUN: grep "bitcast.*float.*i32" target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" define i32 @test(float %X) { diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll index ea4ec14e56..03d25ac708 100644 --- a/test/Transforms/ScalarRepl/union-pointer.ll +++ b/test/Transforms/ScalarRepl/union-pointer.ll @@ -1,7 +1,7 @@ ; PR892 ; RUN: opt < %s -scalarrepl -S | \ ; RUN: not grep alloca -; RUN: opt < %s -scalarrepl -S | grep {ret i8} +; RUN: opt < %s -scalarrepl -S | grep "ret i8" target datalayout = "e-p:32:32-n8:16:32" target triple = "i686-apple-darwin8.7.2" diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll index decbd301b8..33e8034f57 100644 --- a/test/Transforms/ScalarRepl/vector_memcpy.ll +++ b/test/Transforms/ScalarRepl/vector_memcpy.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -scalarrepl -S > %t -; RUN: grep {ret <16 x float> %A} %t -; RUN: grep {ret <16 x float> zeroinitializer} %t +; RUN: grep "ret <16 x float> %A" %t +; RUN: grep "ret <16 x float> zeroinitializer" %t target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" define <16 x float> @foo(<16 x float> %A) nounwind { diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll index fadf1aa276..056526cbd9 100644 --- a/test/Transforms/ScalarRepl/volatile.ll +++ b/test/Transforms/ScalarRepl/volatile.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -scalarrepl -S | grep {load volatile} -; RUN: opt < %s -scalarrepl -S | grep {store volatile} +; RUN: opt < %s -scalarrepl -S | grep "load volatile" +; RUN: opt < %s -scalarrepl -S | grep "store volatile" define i32 @voltest(i32 %T) { %A = alloca {i32, i32} diff --git a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll index 414235ba7c..feffb4e4c8 100644 --- a/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll +++ b/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll @@ -1,7 +1,7 @@ ; Basic block #2 should not be merged into BB #3! ; ; RUN: opt < %s -simplifycfg -S | \ -; RUN: grep {br label} +; RUN: grep "br label" ; declare void @foo() diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll index 8ac9ae4437..fc89b165f8 100644 --- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll +++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll @@ -2,7 +2,7 @@ ; 'br Dest' ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {br i1 %c2} +; RUN: not grep "br i1 %c2" declare void @noop() diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll index 888e187b6b..c1b032fb8b 100644 --- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll +++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll @@ -4,7 +4,7 @@ ; the ConstantFoldTerminator function. ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {br i1 %c2} +; RUN: not grep "br i1 %c2" declare void @noop() diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll index 00f2d5bcf1..14baeea4b0 100644 --- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll +++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll @@ -1,5 +1,5 @@ ; The phi should not be eliminated in this case, because the fp op could trap. -; RUN: opt < %s -simplifycfg -S | grep {= phi double} +; RUN: opt < %s -simplifycfg -S | grep "= phi double" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll index 56f43b64f7..13ccad6a1e 100644 --- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll +++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -simplifycfg -S > %t -; RUN: not grep {^BB.tomerge} %t -; RUN: grep {^BB.nomerge} %t | count 2 +; RUN: not grep "^BB.tomerge" %t +; RUN: grep "^BB.nomerge" %t | count 2 ; ModuleID = '<stdin>' declare i1 @foo() diff --git a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll index d025dee85f..9b6084f0e1 100644 --- a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll +++ b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplifycfg -S | grep {%outval = phi i32 .*mux} +; RUN: opt < %s -simplifycfg -S | grep "%outval = phi i32 .*mux" ; PR2540 ; Outval should end up with a select from 0/2, not all constants. diff --git a/test/Transforms/SimplifyCFG/BrUnwind.ll b/test/Transforms/SimplifyCFG/BrUnwind.ll index 7ab8faa2ce..14853642c0 100644 --- a/test/Transforms/SimplifyCFG/BrUnwind.ll +++ b/test/Transforms/SimplifyCFG/BrUnwind.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {br label} +; RUN: not grep "br label" define void @test(i1 %C) { br i1 %C, label %A, label %B diff --git a/test/Transforms/SimplifyCFG/DeadSetCC.ll b/test/Transforms/SimplifyCFG/DeadSetCC.ll index 83394628cc..c62560000c 100644 --- a/test/Transforms/SimplifyCFG/DeadSetCC.ll +++ b/test/Transforms/SimplifyCFG/DeadSetCC.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {icmp eq} +; RUN: not grep "icmp eq" ; Check that simplifycfg deletes a dead 'seteq' instruction when it ; folds a conditional branch into a switch instruction. diff --git a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll index bf9d9535d6..b6d54d3256 100644 --- a/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll +++ b/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll @@ -3,7 +3,7 @@ ; important case. This is basically the most trivial form of tail-duplication. ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {br label} +; RUN: not grep "br label" define i32 @test(i1 %B, i32 %A, i32 %B.upgrd.1) { br i1 %B, label %T, label %F diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll index 70c5fb5db2..7097dea424 100644 --- a/test/Transforms/SimplifyCFG/branch-fold.ll +++ b/test/Transforms/SimplifyCFG/branch-fold.ll @@ -50,3 +50,21 @@ c: %o2 = phi i1 [ false, %a ], [ %phitmp, %b ], [ false, %entry ] ret i1 %o2 } + +; PR13180 +define void @pr13180(i8 %p) { +entry: + %tobool = icmp eq i8 %p, 0 + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + %phitmp = icmp eq i8 %p, 0 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i1 [ undef, %cond.true ], [ %phitmp, %cond.false ] + unreachable +} diff --git a/test/Transforms/SimplifyCFG/branch-phi-thread.ll b/test/Transforms/SimplifyCFG/branch-phi-thread.ll index f52d979ecd..c19ba69866 100644 --- a/test/Transforms/SimplifyCFG/branch-phi-thread.ll +++ b/test/Transforms/SimplifyCFG/branch-phi-thread.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -simplifycfg -adce -S | \ -; RUN: not grep {call void @f1} +; RUN: not grep "call void @f1" ; END. declare void @f1() diff --git a/test/Transforms/SimplifyCFG/duplicate-phis.ll b/test/Transforms/SimplifyCFG/duplicate-phis.ll index 5129f9fb6d..4788406fc0 100644 --- a/test/Transforms/SimplifyCFG/duplicate-phis.ll +++ b/test/Transforms/SimplifyCFG/duplicate-phis.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -instcombine -simplifycfg -S | grep { = phi } | count 1 +; RUN: opt < %s -instcombine -simplifycfg -S | grep " = phi " | count 1 ; instcombine should sort the PHI operands so that simplifycfg can see the ; duplicate and remove it. diff --git a/test/Transforms/SimplifyCFG/invoke.ll b/test/Transforms/SimplifyCFG/invoke.ll new file mode 100644 index 0000000000..ddced6b94d --- /dev/null +++ b/test/Transforms/SimplifyCFG/invoke.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_call_unexpected(i8*) +declare i32 @read_only() nounwind readonly +declare i32 @nounwind_fn() nounwind +declare i32 @fn() + + +; CHECK: @f1 +define i8* @f1() nounwind uwtable ssp { +entry: +; CHECK: call void @llvm.trap() +; CHECK: unreachable + %call = invoke noalias i8* undef() + to label %invoke.cont unwind label %lpad + +invoke.cont: + ret i8* %call + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +; CHECK: @f2 +define i8* @f2() nounwind uwtable ssp { +entry: +; CHECK: call void @llvm.trap() +; CHECK: unreachable + %call = invoke noalias i8* null() + to label %invoke.cont unwind label %lpad + +invoke.cont: + ret i8* %call + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +; CHECK: @f3 +define i32 @f3() nounwind uwtable ssp { +; CHECK-NEXT: entry +entry: +; CHECK-NEXT: ret i32 3 + %call = invoke i32 @read_only() + to label %invoke.cont unwind label %lpad + +invoke.cont: + ret i32 3 + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +; CHECK: @f4 +define i32 @f4() nounwind uwtable ssp { +; CHECK-NEXT: entry +entry: +; CHECK-NEXT: call i32 @read_only() + %call = invoke i32 @read_only() + to label %invoke.cont unwind label %lpad + +invoke.cont: +; CHECK-NEXT: ret i32 %call + ret i32 %call + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + %1 = extractvalue { i8*, i32 } %0, 0 + tail call void @__cxa_call_unexpected(i8* %1) noreturn nounwind + unreachable +} + +; CHECK: @f5 +define i32 @f5(i1 %cond, i8* %a, i8* %b) { +entry: + br i1 %cond, label %x, label %y + +x: +; CHECK: invoke i32 @fn() + %call = invoke i32 @fn() + to label %cont unwind label %lpad + +y: +; CHECK: call i32 @nounwind_fn() + %call2 = invoke i32 @nounwind_fn() + to label %cont unwind label %lpad + +cont: +; CHECK: phi i32 +; CHECK: ret i32 %phi + %phi = phi i32 [%call, %x], [%call2, %y] + ret i32 %phi + +lpad: +; CHECK-NOT: phi + %phi2 = phi i8* [%a, %x], [%b, %y] + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer +; CHECK: __cxa_call_unexpected(i8* %a) + tail call void @__cxa_call_unexpected(i8* %phi2) noreturn nounwind + unreachable +} diff --git a/test/Transforms/SimplifyCFG/switch_thread.ll b/test/Transforms/SimplifyCFG/switch_thread.ll index bd85fccd52..93966841a4 100644 --- a/test/Transforms/SimplifyCFG/switch_thread.ll +++ b/test/Transforms/SimplifyCFG/switch_thread.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -simplifycfg -S | \ -; RUN: not grep {call void @DEAD} +; RUN: not grep "call void @DEAD" ; Test that we can thread a simple known condition through switch statements. diff --git a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll index 8e9f2062cd..ae917f70f4 100644 --- a/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll +++ b/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll @@ -1,8 +1,8 @@ ; PR1307 ; RUN: opt < %s -simplify-libcalls -instcombine -S > %t -; RUN: grep {@str,.*i64 3} %t -; RUN: grep {@str1,.*i64 7} %t -; RUN: grep {ret i8.*null} %t +; RUN: grep "@str,.*i64 3" %t +; RUN: grep "@str1,.*i64 7" %t +; RUN: grep "ret i8.*null" %t ; END. @str = internal constant [5 x i8] c"foog\00" diff --git a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll index 9056499b4c..f4c80ed132 100644 --- a/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll +++ b/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -instcombine -S | grep {ret i32 -65} +; RUN: opt < %s -simplify-libcalls -instcombine -S | grep "ret i32 -65" ; PR4284 define i32 @test() nounwind { diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll index ab45f1819b..e38d78349d 100644 --- a/test/Transforms/SimplifyLibCalls/FFS.ll +++ b/test/Transforms/SimplifyLibCalls/FFS.ll @@ -1,6 +1,6 @@ ; Test that the ToAsciiOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*@ffs} +; RUN: not grep "call.*@ffs" @non_const = external global i32 ; <i32*> [#uses=1] diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll index 4a0d232dac..51733e4a1e 100644 --- a/test/Transforms/SimplifyLibCalls/FPrintF.ll +++ b/test/Transforms/SimplifyLibCalls/FPrintF.ll @@ -1,6 +1,6 @@ ; Test that the FPrintFOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*fprintf} +; RUN: not grep "call.*fprintf" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll index 1f72ede796..aa01aba265 100644 --- a/test/Transforms/SimplifyLibCalls/FPuts.ll +++ b/test/Transforms/SimplifyLibCalls/FPuts.ll @@ -1,6 +1,6 @@ ; Test that the FPutsOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*fputs} +; RUN: not grep "call.*fputs" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll index c711178fa1..1faad036a8 100644 --- a/test/Transforms/SimplifyLibCalls/MemCpy.ll +++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -constprop -instcombine -S | not grep {call.*llvm.memcpy.i32} +; RUN: opt < %s -constprop -instcombine -S | not grep "call.*llvm.memcpy.i32" @h = constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=1] @hel = constant [4 x i8] c"hel\00" ; <[4 x i8]*> [#uses=1] diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll index 847e363f52..514a7d9f6e 100644 --- a/test/Transforms/SimplifyLibCalls/SPrintF.ll +++ b/test/Transforms/SimplifyLibCalls/SPrintF.ll @@ -1,6 +1,6 @@ ; Test that the SPrintFOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*sprintf} +; RUN: not grep "call.*sprintf" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll index 4e3d0ab7f4..3ea691a3cf 100644 --- a/test/Transforms/SimplifyLibCalls/StrCat.ll +++ b/test/Transforms/SimplifyLibCalls/StrCat.ll @@ -1,9 +1,9 @@ ; Test that the StrCatOptimizer works correctly ; PR3661 ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*strcat} +; RUN: not grep "call.*strcat" ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: grep {puts.*%arg1} +; RUN: grep "puts.*%arg1" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll index acd8aaf6e5..4a20bbd2ce 100644 --- a/test/Transforms/SimplifyLibCalls/StrLen.ll +++ b/test/Transforms/SimplifyLibCalls/StrLen.ll @@ -1,6 +1,6 @@ ; Test that the StrCatOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*strlen} +; RUN: not grep "call.*strlen" target datalayout = "e-p:32:32" @hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=3] diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll index d09c022fd4..073792b96a 100644 --- a/test/Transforms/SimplifyLibCalls/StrNCat.ll +++ b/test/Transforms/SimplifyLibCalls/StrNCat.ll @@ -1,8 +1,8 @@ ; Test that the StrNCatOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*strncat} +; RUN: not grep "call.*strncat" ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: grep {puts.*%arg1} +; RUN: grep "puts.*%arg1" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/test/Transforms/SimplifyLibCalls/StrNCpy.ll index c8af3ca8c3..4e47b31a6a 100644 --- a/test/Transforms/SimplifyLibCalls/StrNCpy.ll +++ b/test/Transforms/SimplifyLibCalls/StrNCpy.ll @@ -1,6 +1,6 @@ ; Test that the StrNCpyOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*strncpy} +; RUN: not grep "call.*strncpy" ; This transformation requires the pointer size, as it assumes that size_t is ; the size of a pointer. diff --git a/test/Transforms/SimplifyLibCalls/ToAscii.ll b/test/Transforms/SimplifyLibCalls/ToAscii.ll index e2b5683d9d..aef47333b3 100644 --- a/test/Transforms/SimplifyLibCalls/ToAscii.ll +++ b/test/Transforms/SimplifyLibCalls/ToAscii.ll @@ -1,6 +1,6 @@ ; Test that the ToAsciiOptimizer works correctly ; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep {call.*toascii} +; RUN: not grep "call.*toascii" declare i32 @toascii(i32) diff --git a/test/Transforms/SimplifyLibCalls/abs.ll b/test/Transforms/SimplifyLibCalls/abs.ll index 6fbe0b9de4..3934a5b98f 100644 --- a/test/Transforms/SimplifyLibCalls/abs.ll +++ b/test/Transforms/SimplifyLibCalls/abs.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | grep {select i1 %ispos} +; RUN: opt < %s -simplify-libcalls -S | grep "select i1 %ispos" ; PR2337 define i32 @test(i32 %x) { diff --git a/test/Transforms/SimplifyLibCalls/exp2.ll b/test/Transforms/SimplifyLibCalls/exp2.ll index 2f5d910558..a5927757cf 100644 --- a/test/Transforms/SimplifyLibCalls/exp2.ll +++ b/test/Transforms/SimplifyLibCalls/exp2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | grep {call.*ldexp} | count 4 +; RUN: opt < %s -simplify-libcalls -S | grep "call.*ldexp" | count 4 ; rdar://5852514 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" diff --git a/test/Transforms/SimplifyLibCalls/memmove.ll b/test/Transforms/SimplifyLibCalls/memmove.ll index c0c00506cd..5aaeeeb024 100644 --- a/test/Transforms/SimplifyLibCalls/memmove.ll +++ b/test/Transforms/SimplifyLibCalls/memmove.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memmove} +; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memmove" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" diff --git a/test/Transforms/SimplifyLibCalls/memset-64.ll b/test/Transforms/SimplifyLibCalls/memset-64.ll index fb752c4083..92412dee71 100644 --- a/test/Transforms/SimplifyLibCalls/memset-64.ll +++ b/test/Transforms/SimplifyLibCalls/memset-64.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset} +; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-pc-linux-gnu" diff --git a/test/Transforms/SimplifyLibCalls/memset.ll b/test/Transforms/SimplifyLibCalls/memset.ll index 0aede064ca..853215a4d2 100644 --- a/test/Transforms/SimplifyLibCalls/memset.ll +++ b/test/Transforms/SimplifyLibCalls/memset.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset} +; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" target triple = "i686-pc-linux-gnu" diff --git a/test/Transforms/SimplifyLibCalls/pow2.ll b/test/Transforms/SimplifyLibCalls/pow2.ll index f8364f740b..f0964e7d6d 100644 --- a/test/Transforms/SimplifyLibCalls/pow2.ll +++ b/test/Transforms/SimplifyLibCalls/pow2.ll @@ -1,6 +1,6 @@ ; Testcase for calls to the standard C "pow" function ; -; RUN: opt < %s -simplify-libcalls -S | not grep {call .pow} +; RUN: opt < %s -simplify-libcalls -S | not grep "call .pow" declare double @pow(double, double) diff --git a/test/Transforms/TailCallElim/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll index 0c140ad681..5b5dbcc225 100644 --- a/test/Transforms/TailCallElim/ackermann.ll +++ b/test/Transforms/TailCallElim/ackermann.ll @@ -1,5 +1,5 @@ ; This function contains two tail calls, which should be eliminated -; RUN: opt < %s -tailcallelim -stats -disable-output |& grep {2 tailcallelim} +; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim" define i32 @Ack(i32 %M.1, i32 %N.1) { entry: diff --git a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll index 5cc92e1b17..e4f8b483c3 100644 --- a/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll +++ b/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -tailcallelim -S | \ -; RUN: grep {call i32 @foo} +; RUN: grep "call i32 @foo" declare void @bar(i32*) diff --git a/test/Transforms/TailCallElim/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll index 93638804f9..42ac2f9dc4 100644 --- a/test/Transforms/TailCallElim/dup_tail.ll +++ b/test/Transforms/TailCallElim/dup_tail.ll @@ -1,5 +1,5 @@ ; Duplicate the return into if.end to enable TCE. -; RUN: opt %s -tailcallelim -stats -disable-output |& grep {Number of return duplicated} +; RUN: opt %s -tailcallelim -stats -disable-output 2>&1 | grep "Number of return duplicated" define i32 @fib(i32 %n) nounwind ssp { entry: diff --git a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll index 3dddb013f7..3d01d17099 100644 --- a/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll +++ b/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -tailcallelim -S | \ -; RUN: grep {tail call void @foo} +; RUN: grep "tail call void @foo" declare void @foo() diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll index 03e99bc9bf..7853d7ba06 100644 --- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll +++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate +; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate ; XFAIL: * define i32 @foo(i32 %l) nounwind { diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll index 4c1ddd1e3a..b361112711 100644 --- a/test/Verifier/2002-04-13-RetTypes.ll +++ b/test/Verifier/2002-04-13-RetTypes.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'i32'} +; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'i32'" ; Verify the the operand type of the ret instructions in a function match the ; delcared return type of the function they live in. diff --git a/test/Verifier/2002-11-05-GetelementptrPointers.ll b/test/Verifier/2002-11-05-GetelementptrPointers.ll index 1f71387ab3..108ae5f765 100644 --- a/test/Verifier/2002-11-05-GetelementptrPointers.ll +++ b/test/Verifier/2002-11-05-GetelementptrPointers.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {invalid getelementptr indices} +; RUN: not llvm-as < %s 2>&1 | grep "invalid getelementptr indices" ; This testcase is invalid because we are indexing into a pointer that is ; contained WITHIN a structure. diff --git a/test/Verifier/2006-07-11-StoreStruct.ll b/test/Verifier/2006-07-11-StoreStruct.ll index 80ab122d0b..65b229d1ca 100644 --- a/test/Verifier/2006-07-11-StoreStruct.ll +++ b/test/Verifier/2006-07-11-StoreStruct.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s |& not grep {Instruction operands must be first-class} +; RUN: llvm-as < %s 2>&1 | not grep "Instruction operands must be first-class" ; This previously was for PR826, but structs are now first-class so ; the following is now valid. diff --git a/test/Verifier/2006-10-15-AddrLabel.ll b/test/Verifier/2006-10-15-AddrLabel.ll index 0b73b47893..c8fedb5f19 100644 --- a/test/Verifier/2006-10-15-AddrLabel.ll +++ b/test/Verifier/2006-10-15-AddrLabel.ll @@ -1,4 +1,5 @@ -; RUN: not llvm-as < %s > /dev/null |& grep {basic block pointers are invalid} +; RUN: not llvm-as < %s > /dev/null 2> %t +; RUN: grep "basic block pointers are invalid" %t define i32 @main() { %foo = call i8* %llvm.stacksave() diff --git a/test/Verifier/2006-12-12-IntrinsicDefine.ll b/test/Verifier/2006-12-12-IntrinsicDefine.ll index 8d09b51206..6e7468c1d9 100644 --- a/test/Verifier/2006-12-12-IntrinsicDefine.ll +++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {llvm intrinsics cannot be defined} +; RUN: not llvm-as < %s 2>&1 | grep "llvm intrinsics cannot be defined" ; PR1047 define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) { diff --git a/test/Verifier/2008-03-01-AllocaSized.ll b/test/Verifier/2008-03-01-AllocaSized.ll index 079a75d792..51258bef76 100644 --- a/test/Verifier/2008-03-01-AllocaSized.ll +++ b/test/Verifier/2008-03-01-AllocaSized.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null |& grep {Cannot allocate unsized type} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Cannot allocate unsized type" ; PR2113 define void @test() { diff --git a/test/Verifier/2008-08-22-MemCpyAlignment.ll b/test/Verifier/2008-08-22-MemCpyAlignment.ll index 125325c04e..c6d5afd51c 100644 --- a/test/Verifier/2008-08-22-MemCpyAlignment.ll +++ b/test/Verifier/2008-08-22-MemCpyAlignment.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null |& grep {alignment argument of memory intrinsics must be a constant int} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "alignment argument of memory intrinsics must be a constant int" ; PR2318 define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind { diff --git a/test/Verifier/2008-11-15-RetVoid.ll b/test/Verifier/2008-11-15-RetVoid.ll index aaef7030c5..42503fabbe 100644 --- a/test/Verifier/2008-11-15-RetVoid.ll +++ b/test/Verifier/2008-11-15-RetVoid.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'void'} +; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'void'" define void @foo() { ret i32 0 diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll index bf5563d9c0..3136c61514 100644 --- a/test/Verifier/2010-08-07-PointerIntrinsic.ll +++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll @@ -1,5 +1,5 @@ ; RUN: not llvm-as < %s 2> %t -; RUN: grep {Broken module} %t +; RUN: grep "Broken module" %t ; PR7316 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" diff --git a/test/Verifier/AmbiguousPhi.ll b/test/Verifier/AmbiguousPhi.ll index 9a72530187..f31bc107ac 100644 --- a/test/Verifier/AmbiguousPhi.ll +++ b/test/Verifier/AmbiguousPhi.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {multiple entries for the same basic block} +; RUN: not llvm-as < %s 2>&1 | grep "multiple entries for the same basic block" diff --git a/test/Verifier/PhiGrouping.ll b/test/Verifier/PhiGrouping.ll index dc529dced3..7b42fd28e3 100644 --- a/test/Verifier/PhiGrouping.ll +++ b/test/Verifier/PhiGrouping.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& grep {PHI nodes not grouped at top} +; RUN: not llvm-as < %s 2>&1 | grep "PHI nodes not grouped at top" diff --git a/test/Verifier/SelfReferential.ll b/test/Verifier/SelfReferential.ll index 70154b77a8..c24c0ebba3 100644 --- a/test/Verifier/SelfReferential.ll +++ b/test/Verifier/SelfReferential.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null |& grep {Only PHI nodes may reference their own value} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Only PHI nodes may reference their own value" ; Test that self referential instructions are not allowed diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll index fc5ef1ce13..a52e796b2b 100644 --- a/test/Verifier/aliasing-chain.ll +++ b/test/Verifier/aliasing-chain.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null |& grep {Aliasing chain should end with function or global variable} +; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Aliasing chain should end with function or global variable" ; Test that alising chain does not create a cycle diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll index 48cd061d32..66c5396443 100644 --- a/test/Verifier/cttz-undef-arg.ll +++ b/test/Verifier/cttz-undef-arg.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s declare i32 @llvm.ctlz.i32(i32, i1) declare i32 @llvm.cttz.i32(i32, i1) diff --git a/test/Verifier/dominates.ll b/test/Verifier/dominates.ll index 50bfa616f0..17e2c33999 100644 --- a/test/Verifier/dominates.ll +++ b/test/Verifier/dominates.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s define i32 @f1(i32 %x) { %y = add i32 %z, 1 diff --git a/test/Verifier/fpmath.ll b/test/Verifier/fpmath.ll index b764a63f0a..7002c5c825 100644 --- a/test/Verifier/fpmath.ll +++ b/test/Verifier/fpmath.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s |& FileCheck %s +; RUN: not llvm-as < %s 2>&1 | FileCheck %s define void @fpmath1(i32 %i, float %f, <2 x float> %g) { %s = add i32 %i, %i, !fpmath !0 diff --git a/test/Verifier/invoke-1.ll b/test/Verifier/invoke-1.ll deleted file mode 100644 index 427abe02ce..0000000000 --- a/test/Verifier/invoke-1.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: not llvm-as < %s |& grep {not verify as correct} -; PR1042 - -define i32 @foo() { - %A = invoke i32 @foo( ) - to label %L unwind label %L ; <i32> [#uses=1] -L: ; preds = %0, %0 - ret i32 %A -} - diff --git a/test/Verifier/invoke-2.ll b/test/Verifier/invoke-2.ll deleted file mode 100644 index 0145935a1a..0000000000 --- a/test/Verifier/invoke-2.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: not llvm-as %s |& grep {not verify as correct} -; PR1042 - -define i32 @foo() { - br i1 false, label %L1, label %L2 -L1: ; preds = %0 - %A = invoke i32 @foo( ) - to label %L unwind label %L ; <i32> [#uses=1] -L2: ; preds = %0 - br label %L -L: ; preds = %L2, %L1, %L1 - ret i32 %A -} - diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll new file mode 100644 index 0000000000..06f40f0052 --- /dev/null +++ b/test/Verifier/invoke.ll @@ -0,0 +1,65 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +; PR1042 +define i32 @foo() { +; CHECK: The unwind destination does not have a landingpad instruction + %A = invoke i32 @foo( ) + to label %L unwind label %L ; <i32> [#uses=1] +L: ; preds = %0, %0 + ret i32 %A +} + +; PR1042 +define i32 @bar() { + br i1 false, label %L1, label %L2 +L1: ; preds = %0 + %A = invoke i32 @bar( ) + to label %L unwind label %L ; <i32> [#uses=1] +L2: ; preds = %0 + br label %L +L: ; preds = %L2, %L1, %L1 +; CHECK: The unwind destination does not have a landingpad instruction +; CHECK: Instruction does not dominate all uses + ret i32 %A +} + + +declare i32 @__gxx_personality_v0(...) +declare void @llvm.donothing() +declare void @llvm.trap() +declare i8 @llvm.expect.i8(i8,i8) +declare i32 @fn(i8 (i8, i8)*) + +define void @f1() { +entry: +; OK + invoke void @llvm.donothing() + to label %cont unwind label %cont + +cont: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + ret void +} + +define i8 @f2() { +entry: +; CHECK: Cannot invoke an intrinsinc other than donothing + invoke void @llvm.trap() + to label %cont unwind label %lpad + +cont: + ret i8 3 + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + filter [0 x i8*] zeroinitializer + ret i8 2 +} + +define i32 @f3() { +entry: +; CHECK: Cannot take the address of an intrinsic + %call = call i32 @fn(i8 (i8, i8)* @llvm.expect.i8) + ret i32 %call +} diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll index 7a317fca8f..b6a75d13bb 100644 --- a/test/Verifier/range-1.ll +++ b/test/Verifier/range-1.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s define void @f1(i8* %x) { entry: diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data index b306bdb719..028f41bb55 100644 --- a/test/YAMLParser/spec-05-02-utf8.data +++ b/test/YAMLParser/spec-05-02-utf8.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # Invalid use of BOM # inside a diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data index 6788f0bfc3..bab2c1b89c 100644 --- a/test/YAMLParser/spec-05-10.data +++ b/test/YAMLParser/spec-05-10.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s commercial-at: @text grave-accent: `text diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data index 7dadff76f8..eedfc08cf3 100644 --- a/test/YAMLParser/spec-05-12.data +++ b/test/YAMLParser/spec-05-12.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # We don't currently reject tabs as indentation. # XFAIL: * diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data index cd8421ad27..27dbd8396e 100644 --- a/test/YAMLParser/spec-05-15.data +++ b/test/YAMLParser/spec-05-15.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s Bad escapes: "\c diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data index 7ca9483016..c4a5299562 100644 --- a/test/YAMLParser/spec-07-03.data +++ b/test/YAMLParser/spec-07-03.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s %YAML 1.1 %YAML 1.1 diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data index 279b54afa1..f7cff3a839 100644 --- a/test/YAMLParser/spec-07-05.data +++ b/test/YAMLParser/spec-07-05.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # We don't currently parse TAG directives. # XFAIL: * diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data index f13538bc87..73c493d763 100644 --- a/test/YAMLParser/spec-08-04.data +++ b/test/YAMLParser/spec-08-04.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # We don't currently look at the content of literal tags. # XFAIL: * diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data index a811bfdefe..9844f53a4e 100644 --- a/test/YAMLParser/spec-08-06.data +++ b/test/YAMLParser/spec-08-06.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # We don't currently validate tags. # XFAIL: * diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data index f69037820e..9d8a58ca33 100644 --- a/test/YAMLParser/spec-09-02.data +++ b/test/YAMLParser/spec-09-02.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # Indent trimming is not yet implemented. # XFAIL: * diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data index 890f6bf2e7..a83fcd45e2 100644 --- a/test/YAMLParser/spec-09-14.data +++ b/test/YAMLParser/spec-09-14.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # Not quite sure why this doesn't fail. # XFAIL: * diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data index 2bcc28337f..6eb7917a97 100644 --- a/test/YAMLParser/spec-09-21.data +++ b/test/YAMLParser/spec-09-21.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s - | diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data index 5b981e9833..53faeb9e00 100644 --- a/test/YAMLParser/spec-10-08.data +++ b/test/YAMLParser/spec-10-08.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s |& FileCheck %s +# RUN: yaml-bench -canonical %s 2>&1 | FileCheck %s # # This fails because even without a key token, some contexts (in this case flow # maps) allow implicit null keys, which mix with this in weird ways. diff --git a/test/lit.cfg b/test/lit.cfg index 2383d8fe79..575c127333 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -10,7 +10,9 @@ import re config.name = 'LLVM' # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.TclTest() +execute_external = (sys.platform in ['win32'] + or lit.getBashPath() not in [None, ""]) +config.test_format = lit.formats.ShTest(execute_external) # To ignore test output on stderr so it doesn't trigger failures uncomment this: #config.test_format = lit.formats.TclTest(ignoreStdErr=True) @@ -19,6 +21,11 @@ config.test_format = lit.formats.TclTest() # set by on_clone(). config.suffixes = [] +# excludes: A list of directories to exclude from the testsuite. The 'Inputs' +# subdirectories contain auxiliary inputs for various tests in their parent +# directories. +config.excludes = ['Inputs'] + # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) @@ -132,18 +139,6 @@ if config.test_exec_root is None: ### -# Load site data from DejaGNU's site.exp. -import re -site_exp = {} -# FIXME: Implement lit.site.cfg. -for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')): - m = re.match('set ([^ ]+) "(.*)"', line) - if m: - site_exp[m.group(1)] = m.group(2) - -# Provide target_triple for use in XFAIL and XTARGET. -config.target_triple = site_exp['target_triplet'] - # When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the # triple so we can check it with XFAIL and XTARGET. config.target_triple += lit.valgrindTriple @@ -164,9 +159,10 @@ if jit_impl_cfg == 'mcjit': else: config.substitutions.append( ('%lli', 'lli') ) -# Add substitutions. -for sub in ['link', 'shlibext', 'ocamlopt', 'llvmshlibdir']: - config.substitutions.append(('%' + sub, site_exp[sub])) +# Add site-specific substitutions. +config.substitutions.append( ('%ocamlopt', config.ocamlopt_executable) ) +config.substitutions.append( ('%llvmshlibdir', config.llvm_shlib_dir) ) +config.substitutions.append( ('%shlibext', config.llvm_shlib_ext) ) # For each occurrence of an llvm tool name as its own word, replace it # with the full path to the build directory holding that tool. This diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index e069dd130b..178b22f10f 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -1,10 +1,14 @@ ## Autogenerated by LLVM/Clang configuration. # Do not edit! +config.target_triple = "@TARGET_TRIPLE@" config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.llvm_shlib_dir = "@SHLIBDIR@" +config.llvm_shlib_ext = "@SHLIBEXT@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.python_executable = "@PYTHON_EXECUTABLE@" +config.ocamlopt_executable = "@OCAMLOPT@" config.enable_shared = @ENABLE_SHARED@ config.enable_assertions = @ENABLE_ASSERTIONS@ config.targets_to_build = "@TARGETS_TO_BUILD@" diff --git a/test/site.exp.in b/test/site.exp.in deleted file mode 100644 index cfb2eac550..0000000000 --- a/test/site.exp.in +++ /dev/null @@ -1,16 +0,0 @@ -## Autogenerated by LLVM configuration. -# Do not edit! -set target_triplet "@TARGET_TRIPLE@" -set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@" -set llvmshlibdir "@SHLIBDIR@" -set llvm_bindings "@LLVM_BINDINGS@" -set srcroot "@LLVM_SOURCE_DIR@" -set objroot "@LLVM_BINARY_DIR@" -set srcdir "@LLVM_SOURCE_DIR@" -set objdir "@LLVM_BINARY_DIR@" -set link "@TEST_LINK_CMD@" -set shlibext "@SHLIBEXT@" -set ocamlopt "@OCAMLOPT@" -set valgrind "@VALGRIND@" -set grep "@GREP@" -set gas "@AS@" diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index ed48acc29a..aa52def1bc 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -18,7 +18,8 @@ #include "llvm/PassManager.h" #include "llvm/Pass.h" #include "llvm/ADT/Triple.h" -#include "llvm/Support/DataStream.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/DataStream.h" // @LOCALMOD #include "llvm/Support/IRReader.h" #include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" @@ -38,6 +39,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" #include <memory> @@ -191,11 +193,6 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim", cl::init(false)); static cl::opt<bool> -DisableExcessPrecision("disable-excess-fp-precision", - cl::desc("Disable optimizations that may increase FP precision"), - cl::init(false)); - -static cl::opt<bool> EnableUnsafeFPMath("enable-unsafe-fp-math", cl::desc("Enable optimizations that may decrease FP precision"), cl::init(false)); @@ -234,12 +231,30 @@ FloatABIForCalls("float-abi", "Hard float ABI (uses FP registers)"), clEnumValEnd)); +static cl::opt<llvm::FPOpFusion::FPOpFusionMode> +FuseFPOps("fp-contract", + cl::desc("Enable aggresive formation of fused FP ops"), + cl::init(FPOpFusion::Standard), + cl::values( + clEnumValN(FPOpFusion::Fast, "fast", + "Fuse FP ops whenever profitable"), + clEnumValN(FPOpFusion::Standard, "on", + "Only fuse 'blessed' FP ops."), + clEnumValN(FPOpFusion::Strict, "off", + "Only fuse FP ops when the result won't be effected."), + clEnumValEnd)); + static cl::opt<bool> DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), cl::init(false)); static cl::opt<bool> +DisableSimplifyLibCalls("disable-simplify-libcalls", + cl::desc("Disable simplify-libcalls"), + cl::init(false)); + +static cl::opt<bool> EnableGuaranteedTailCallOpt("tailcallopt", cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), cl::init(false)); @@ -259,11 +274,6 @@ EnableRealignStack("realign-stack", cl::desc("Realign stack if needed"), cl::init(true)); -static cl::opt<bool> -DisableSwitchTables(cl::Hidden, "disable-jump-tables", - cl::desc("Do not generate jump tables."), - cl::init(false)); - static cl::opt<std::string> TrapFuncName("trap-func", cl::Hidden, cl::desc("Emit a call to trap function rather than a trap instruction"), @@ -279,6 +289,20 @@ SegmentedStacks("segmented-stacks", cl::desc("Use segmented stacks if possible."), cl::init(false)); +static cl::opt<bool> +UseInitArray("use-init-array", + cl::desc("Use .init_array instead of .ctors."), + cl::init(false)); + +static cl::opt<std::string> StopAfter("stop-after", + cl::desc("Stop compilation after a specific pass"), + cl::value_desc("pass-name"), + cl::init("")); +static cl::opt<std::string> StartAfter("start-after", + cl::desc("Resume compilation after a specific pass"), + cl::value_desc("pass-name"), + cl::init("")); + // @LOCALMOD-BEGIN // Using bitcode streaming has a couple of ramifications. Primarily it means // that the module in the file will be compiled one function at a time rather @@ -447,6 +471,15 @@ int llc_main(int argc, char **argv) { InitializeAllAsmPrinters(); InitializeAllAsmParsers(); + // Initialize codegen and IR passes used by llc so that the -print-after, + // -print-before, and -stop-after options work. + PassRegistry *Registry = PassRegistry::getPassRegistry(); + initializeCore(*Registry); + initializeCodeGen(*Registry); + initializeLoopStrengthReducePass(*Registry); + initializeLowerIntrinsicsPass(*Registry); + initializeUnreachableBlockElimPass(*Registry); + // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); @@ -455,7 +488,14 @@ int llc_main(int argc, char **argv) { // Load the module to be compiled... SMDiagnostic Err; std::auto_ptr<Module> M; + Module *mod = 0; + Triple TheTriple; + + bool SkipModule = MCPU == "help" || + (!MAttrs.empty() && MAttrs.front() == "help"); + // If user just wants to list available options, skip module loading + if (!SkipModule) { // @LOCALMOD-BEGIN #if defined(__native_client__) && defined(NACL_SRPC) if (LazyBitcode) { @@ -490,36 +530,38 @@ int llc_main(int argc, char **argv) { #endif // @LOCALMOD-END - if (M.get() == 0) { - Err.print(argv[0], errs()); - return 1; - } - Module &mod = *M.get(); + mod = M.get(); + if (mod == 0) { + Err.print(argv[0], errs()); + return 1; + } // @LOCALMOD-BEGIN #if defined(__native_client__) && defined(NACL_SRPC) - RecordMetadataForSrpc(mod); + RecordMetadataForSrpc(*mod); // To determine if we should compile PIC or not, we needed to load at // least the metadata. Since we've already constructed the commandline, // we have to hack this in after commandline processing. - if (mod.getOutputFormat() == Module::SharedOutputFormat) { + if (mod->getOutputFormat() == Module::SharedOutputFormat) { RelocModel = Reloc::PIC_; } // Also set PIC_ for dynamic executables: // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2351 - if (mod.lib_size() > 0) { + if (mod->lib_size() > 0) { RelocModel = Reloc::PIC_; } #endif // defined(__native_client__) && defined(NACL_SRPC) // @LOCALMOD-END - // If we are supposed to override the target triple, do so now. - if (!TargetTriple.empty()) - mod.setTargetTriple(Triple::normalize(TargetTriple)); + // If we are supposed to override the target triple, do so now. + if (!TargetTriple.empty()) + mod->setTargetTriple(Triple::normalize(TargetTriple)); + TheTriple = Triple(mod->getTargetTriple()); + } else { + TheTriple = Triple(Triple::normalize(TargetTriple)); + } - // Figure out the target triple. - Triple TheTriple(mod.getTargetTriple()); if (TheTriple.getTriple().empty()) TheTriple.setTriple(sys::getDefaultTargetTriple()); @@ -562,7 +604,7 @@ int llc_main(int argc, char **argv) { Options.LessPreciseFPMADOption = EnableFPMAD; Options.NoFramePointerElim = DisableFPElim; Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf; - Options.NoExcessFPPrecision = DisableExcessPrecision; + Options.AllowFPOpFusion = FuseFPOps; Options.UnsafeFPMath = EnableUnsafeFPMath; Options.NoInfsFPMath = EnableNoInfsFPMath; Options.NoNaNsFPMath = EnableNoNaNsFPMath; @@ -576,16 +618,17 @@ int llc_main(int argc, char **argv) { Options.DisableTailCalls = DisableTailCalls; Options.StackAlignmentOverride = OverrideStackAlignment; Options.RealignStack = EnableRealignStack; - Options.DisableJumpTables = DisableSwitchTables; Options.TrapFuncName = TrapFuncName; Options.PositionIndependentExecutable = EnablePIE; Options.EnableSegmentedStacks = SegmentedStacks; + Options.UseInitArray = UseInitArray; std::auto_ptr<TargetMachine> target(TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, Options, RelocModel, CMModel, OLvl)); assert(target.get() && "Could not allocate target machine!"); + assert(mod && "Should have exited after outputting help!"); TargetMachine &Target = *target.get(); if (DisableDotLoc) @@ -606,26 +649,32 @@ int llc_main(int argc, char **argv) { Target.setMCUseLoc(false); #if !defined(NACL_SRPC) - // Figure out where we are going to send the output... + // Figure out where we are going to send the output. OwningPtr<tool_output_file> Out (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0])); if (!Out) return 1; #endif - + // Build up all of the passes that we want to do to the module. // @LOCALMOD-BEGIN OwningPtr<PassManagerBase> PM; if (LazyBitcode || ReduceMemoryFootprint) - PM.reset(new FunctionPassManager(&mod)); + PM.reset(new FunctionPassManager(mod)); else PM.reset(new PassManager()); // @LOCALMOD-END + // Add an appropriate TargetLibraryInfo pass for the module's triple. + TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple); + if (DisableSimplifyLibCalls) + TLI->disableAllFunctions(); + PM->add(TLI); + // Add the target data from the target machine, if it exists, or the module. if (const TargetData *TD = Target.getTargetData()) PM->add(new TargetData(*TD)); else - PM->add(new TargetData(&mod)); + PM->add(new TargetData(mod)); // Override default to generate verbose assembly. Target.setAsmVerbosityDefault(true); @@ -655,7 +704,7 @@ int llc_main(int argc, char **argv) { if (LazyBitcode || ReduceMemoryFootprint) { FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get()); P->doInitialization(); - for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I) { + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { P->run(*I); if (ReduceMemoryFootprint) { I->Dematerialize(); @@ -663,7 +712,7 @@ int llc_main(int argc, char **argv) { } P->doFinalization(); } else { - static_cast<PassManager*>(PM.get())->run(mod); + static_cast<PassManager*>(PM.get())->run(*mod); } FOS.flush(); ROS.flush(); @@ -674,8 +723,29 @@ int llc_main(int argc, char **argv) { { formatted_raw_ostream FOS(Out->os()); + AnalysisID StartAfterID = 0; + AnalysisID StopAfterID = 0; + const PassRegistry *PR = PassRegistry::getPassRegistry(); + if (!StartAfter.empty()) { + const PassInfo *PI = PR->getPassInfo(StartAfter); + if (!PI) { + errs() << argv[0] << ": start-after pass is not registered.\n"; + return 1; + } + StartAfterID = PI->getTypeInfo(); + } + if (!StopAfter.empty()) { + const PassInfo *PI = PR->getPassInfo(StopAfter); + if (!PI) { + errs() << argv[0] << ": stop-after pass is not registered.\n"; + return 1; + } + StopAfterID = PI->getTypeInfo(); + } + // Ask the target to add backend passes as necessary. - if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify)) { + if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify, + StartAfterID, StopAfterID)) { errs() << argv[0] << ": target does not support generation of this" << " file type!\n"; return 1; @@ -687,7 +757,7 @@ int llc_main(int argc, char **argv) { if (LazyBitcode || ReduceMemoryFootprint) { FunctionPassManager *P = static_cast<FunctionPassManager*>(PM.get()); P->doInitialization(); - for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I) { + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { P->run(*I); if (ReduceMemoryFootprint) { I->Dematerialize(); @@ -695,7 +765,7 @@ int llc_main(int argc, char **argv) { } P->doFinalization(); } else { - static_cast<PassManager*>(PM.get())->run(mod); + static_cast<PassManager*>(PM.get())->run(*mod); } } diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index a5f68a6418..75ceda61ad 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp @@ -17,11 +17,11 @@ //===----------------------------------------------------------------------===// #include "llvm/LLVMContext.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/IntrinsicInst.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataStream.h" @@ -100,7 +100,6 @@ public: DIVariable Var(DDI->getVariable()); if (!Padded) { OS.PadToColumn(50); - Padded = true; OS << ";"; } OS << " [debug variable = " << Var.getName() << "]"; @@ -109,7 +108,6 @@ public: DIVariable Var(DVI->getVariable()); if (!Padded) { OS.PadToColumn(50); - Padded = true; OS << ";"; } OS << " [debug variable = " << Var.getName() << "]"; diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index ca0493de5d..b6536fa1d7 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -39,6 +39,11 @@ static cl::opt<unsigned long long> Address("address", cl::init(-1ULL), cl::desc("Print line information for a given address")); +static cl::opt<bool> +PrintFunctions("functions", cl::init(false), + cl::desc("Print function names as well as line information " + "for a given address")); + static void DumpInput(const StringRef &Filename) { OwningPtr<MemoryBuffer> Buff; @@ -92,7 +97,13 @@ static void DumpInput(const StringRef &Filename) { dictx->dump(outs()); } else { // Print line info for the specified address. - DILineInfo dli = dictx->getLineInfoForAddress(Address); + int spec_flags = DILineInfoSpecifier::FileLineInfo; + if (PrintFunctions) + spec_flags |= DILineInfoSpecifier::FunctionName; + DILineInfo dli = dictx->getLineInfoForAddress(Address, spec_flags); + if (PrintFunctions) + outs() << (dli.getFunctionName() ? dli.getFunctionName() : "<unknown>") + << "\n"; outs() << (dli.getFileName() ? dli.getFileName() : "<unknown>") << ':' << dli.getLine() << ':' << dli.getColumn() << '\n'; } diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 5e175a004d..ed4f2b6432 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -44,7 +44,7 @@ using namespace object; static cl::opt<bool> CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" - "write it to a graphviz file (MachO-only)")); + " write it to a graphviz file (MachO-only)")); static cl::opt<bool> UseDbg("g", cl::desc("Print line information from debug info if available")); diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp index fb05a589e8..31252dd7f7 100644 --- a/tools/llvm-stress/llvm-stress.cpp +++ b/tools/llvm-stress/llvm-stress.cpp @@ -82,6 +82,12 @@ public: uint64_t Val = Rand32(); return Val | (uint64_t(Rand32()) << 32); } + + /// Rand operator for STL algorithms. + ptrdiff_t operator()(ptrdiff_t y) { + return Rand64() % y; + } + private: unsigned Seed; }; @@ -599,15 +605,13 @@ struct CmpModifier: public Modifier { } }; -void FillFunction(Function *F) { +void FillFunction(Function *F, Random &R) { // Create a legal entry block. BasicBlock *BB = BasicBlock::Create(F->getContext(), "BB", F); ReturnInst::Create(F->getContext(), BB); // Create the value table. Modifier::PieceTable PT; - // Pick an initial seed value - Random R(SeedCL); // Consider arguments as legal values. for (Function::arg_iterator it = F->arg_begin(), e = F->arg_end(); @@ -648,15 +652,17 @@ void FillFunction(Function *F) { SM->ActN(5); // Throw in a few stores. } -void IntroduceControlFlow(Function *F) { - std::set<Instruction*> BoolInst; +void IntroduceControlFlow(Function *F, Random &R) { + std::vector<Instruction*> BoolInst; for (BasicBlock::iterator it = F->begin()->begin(), e = F->begin()->end(); it != e; ++it) { if (it->getType() == IntegerType::getInt1Ty(F->getContext())) - BoolInst.insert(it); + BoolInst.push_back(it); } - for (std::set<Instruction*>::iterator it = BoolInst.begin(), + std::random_shuffle(BoolInst.begin(), BoolInst.end(), R); + + for (std::vector<Instruction*>::iterator it = BoolInst.begin(), e = BoolInst.end(); it != e; ++it) { Instruction *Instr = *it; BasicBlock *Curr = Instr->getParent(); @@ -678,8 +684,13 @@ int main(int argc, char **argv) { std::auto_ptr<Module> M(new Module("/tmp/autogen.bc", getGlobalContext())); Function *F = GenEmptyFunction(M.get()); - FillFunction(F); - IntroduceControlFlow(F); + + // Pick an initial seed value + Random R(SeedCL); + // Generate lots of random instructions inside a single basic block. + FillFunction(F, R); + // Break the basic block into many loops. + IntroduceControlFlow(F, R); // Figure out what stream we are supposed to write to... OwningPtr<tool_output_file> Out; diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp index 2c841dbc52..5d3d677bf8 100644 --- a/tools/lto/LTOModule.cpp +++ b/tools/lto/LTOModule.cpp @@ -646,7 +646,7 @@ namespace { markGlobal(*Symbol); } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size , unsigned ByteAlignment) { + uint64_t Size , unsigned ByteAlignment) { markDefined(*Symbol); } virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index d9758b35ff..4ada7d1e76 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/LLVMContext.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/RegionPass.h" diff --git a/tools/pso-stub/pso-stub.cpp b/tools/pso-stub/pso-stub.cpp index 45d5347e22..1fdc868499 100644 --- a/tools/pso-stub/pso-stub.cpp +++ b/tools/pso-stub/pso-stub.cpp @@ -145,7 +145,9 @@ AddGlobalVariable(Module *M, GlobalVariable *GV = new GlobalVariable(*M, Ty, /*isConstant=*/ false, Linkage, /*Initializer=*/ InitVal, - Twine(Name), /*InsertBefore=*/ NULL, isTLS, + Twine(Name), /*InsertBefore=*/ NULL, + isTLS ? GlobalVariable::GeneralDynamicTLSModel : + GlobalVariable::NotThreadLocal, /*AddressSpace=*/ 0); AddUsedGlobal(GV); } diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp index 62aadf6f0e..d836036aea 100644 --- a/unittests/ADT/BitVectorTest.cpp +++ b/unittests/ADT/BitVectorTest.cpp @@ -11,14 +11,23 @@ #ifndef __ppc__ #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "gtest/gtest.h" using namespace llvm; namespace { -TEST(BitVectorTest, TrivialOperation) { - BitVector Vec; +// Test fixture +template <typename T> +class BitVectorTest : public ::testing::Test { }; + +// Test both BitVector and SmallBitVector with the same suite of tests. +typedef ::testing::Types<BitVector, SmallBitVector> BitVectorTestTypes; +TYPED_TEST_CASE(BitVectorTest, BitVectorTestTypes); + +TYPED_TEST(BitVectorTest, TrivialOperation) { + TypeParam Vec; EXPECT_EQ(0U, Vec.count()); EXPECT_EQ(0U, Vec.size()); EXPECT_FALSE(Vec.any()); @@ -42,7 +51,7 @@ TEST(BitVectorTest, TrivialOperation) { EXPECT_FALSE(Vec.none()); EXPECT_FALSE(Vec.empty()); - BitVector Inv = Vec; + TypeParam Inv = Vec; Inv.flip(); EXPECT_EQ(6U, Inv.count()); EXPECT_EQ(11U, Inv.size()); @@ -77,8 +86,8 @@ TEST(BitVectorTest, TrivialOperation) { EXPECT_FALSE(Vec[56]); Vec.resize(61, false); - BitVector Copy = Vec; - BitVector Alt(3, false); + TypeParam Copy = Vec; + TypeParam Alt(3, false); Alt.resize(6, true); std::swap(Alt, Vec); EXPECT_TRUE(Copy == Alt); @@ -132,7 +141,7 @@ TEST(BitVectorTest, TrivialOperation) { EXPECT_TRUE(Vec.none()); EXPECT_FALSE(Vec.empty()); - Inv = BitVector().flip(); + Inv = TypeParam().flip(); EXPECT_EQ(0U, Inv.count()); EXPECT_EQ(0U, Inv.size()); EXPECT_FALSE(Inv.any()); @@ -149,13 +158,13 @@ TEST(BitVectorTest, TrivialOperation) { EXPECT_TRUE(Vec.empty()); } -TEST(BitVectorTest, CompoundAssignment) { - BitVector A; +TYPED_TEST(BitVectorTest, CompoundAssignment) { + TypeParam A; A.resize(10); A.set(4); A.set(7); - BitVector B; + TypeParam B; B.resize(50); B.set(5); B.set(18); @@ -188,8 +197,8 @@ TEST(BitVectorTest, CompoundAssignment) { EXPECT_EQ(100U, A.size()); } -TEST(BitVectorTest, ProxyIndex) { - BitVector Vec(3); +TYPED_TEST(BitVectorTest, ProxyIndex) { + TypeParam Vec(3); EXPECT_TRUE(Vec.none()); Vec[0] = Vec[1] = Vec[2] = true; EXPECT_EQ(Vec.size(), Vec.count()); @@ -197,8 +206,8 @@ TEST(BitVectorTest, ProxyIndex) { EXPECT_TRUE(Vec.none()); } -TEST(BitVectorTest, PortableBitMask) { - BitVector A; +TYPED_TEST(BitVectorTest, PortableBitMask) { + TypeParam A; const uint32_t Mask1[] = { 0x80000000, 6, 5 }; A.resize(10); @@ -244,9 +253,9 @@ TEST(BitVectorTest, PortableBitMask) { EXPECT_EQ(64-4u, A.count()); } -TEST(BitVectorTest, BinOps) { - BitVector A; - BitVector B; +TYPED_TEST(BitVectorTest, BinOps) { + TypeParam A; + TypeParam B; A.resize(65); EXPECT_FALSE(A.anyCommon(B)); diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt new file mode 100644 index 0000000000..690ff78414 --- /dev/null +++ b/unittests/ADT/CMakeLists.txt @@ -0,0 +1,32 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_llvm_unittest(ADTTests + APFloatTest.cpp + APIntTest.cpp + BitVectorTest.cpp + DAGDeltaAlgorithmTest.cpp + DeltaAlgorithmTest.cpp + DenseMapTest.cpp + DenseSetTest.cpp + FoldingSet.cpp + HashingTest.cpp + ilistTest.cpp + ImmutableSetTest.cpp + IntEqClassesTest.cpp + IntervalMapTest.cpp + IntrusiveRefCntPtrTest.cpp + PackedVectorTest.cpp + SCCIteratorTest.cpp + SmallPtrSetTest.cpp + SmallStringTest.cpp + SmallVectorTest.cpp + SparseBitVectorTest.cpp + SparseSetTest.cpp + StringMapTest.cpp + StringRefTest.cpp + TripleTest.cpp + TwineTest.cpp + VariadicFunctionTest.cpp + ) diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp index e0ee7782cc..75e7006434 100644 --- a/unittests/ADT/DenseMapTest.cpp +++ b/unittests/ADT/DenseMapTest.cpp @@ -9,170 +9,283 @@ #include "gtest/gtest.h" #include "llvm/ADT/DenseMap.h" +#include <map> +#include <set> using namespace llvm; namespace { -// Test fixture -class DenseMapTest : public testing::Test { -protected: - DenseMap<uint32_t, uint32_t> uintMap; - DenseMap<uint32_t *, uint32_t *> uintPtrMap; - uint32_t dummyInt; +uint32_t getTestKey(int i, uint32_t *) { return i; } +uint32_t getTestValue(int i, uint32_t *) { return 42 + i; } + +uint32_t *getTestKey(int i, uint32_t **) { + static uint32_t dummy_arr1[8192]; + assert(i < 8192 && "Only support 8192 dummy keys."); + return &dummy_arr1[i]; +} +uint32_t *getTestValue(int i, uint32_t **) { + static uint32_t dummy_arr1[8192]; + assert(i < 8192 && "Only support 8192 dummy keys."); + return &dummy_arr1[i]; +} + +/// \brief A test class that tries to check that construction and destruction +/// occur correctly. +class CtorTester { + static std::set<CtorTester *> Constructed; + int Value; + +public: + explicit CtorTester(int Value = 0) : Value(Value) { + EXPECT_TRUE(Constructed.insert(this).second); + } + CtorTester(uint32_t Value) : Value(Value) { + EXPECT_TRUE(Constructed.insert(this).second); + } + CtorTester(const CtorTester &Arg) : Value(Arg.Value) { + EXPECT_TRUE(Constructed.insert(this).second); + } + ~CtorTester() { + EXPECT_EQ(1u, Constructed.erase(this)); + } + operator uint32_t() const { return Value; } + + int getValue() const { return Value; } + bool operator==(const CtorTester &RHS) const { return Value == RHS.Value; } }; -// Empty map tests -TEST_F(DenseMapTest, EmptyIntMapTest) { - // Size tests - EXPECT_EQ(0u, uintMap.size()); - EXPECT_TRUE(uintMap.empty()); +std::set<CtorTester *> CtorTester::Constructed; - // Iterator tests - EXPECT_TRUE(uintMap.begin() == uintMap.end()); +struct CtorTesterMapInfo { + static inline CtorTester getEmptyKey() { return CtorTester(-1); } + static inline CtorTester getTombstoneKey() { return CtorTester(-2); } + static unsigned getHashValue(const CtorTester &Val) { + return Val.getValue() * 37u; + } + static bool isEqual(const CtorTester &LHS, const CtorTester &RHS) { + return LHS == RHS; + } +}; - // Lookup tests - EXPECT_FALSE(uintMap.count(0u)); - EXPECT_TRUE(uintMap.find(0u) == uintMap.end()); - EXPECT_EQ(0u, uintMap.lookup(0u)); -} +CtorTester getTestKey(int i, CtorTester *) { return CtorTester(i); } +CtorTester getTestValue(int i, CtorTester *) { return CtorTester(42 + i); } + +// Test fixture, with helper functions implemented by forwarding to global +// function overloads selected by component types of the type parameter. This +// allows all of the map implementations to be tested with shared +// implementations of helper routines. +template <typename T> +class DenseMapTest : public ::testing::Test { +protected: + T Map; + + static typename T::key_type *const dummy_key_ptr; + static typename T::mapped_type *const dummy_value_ptr; + + typename T::key_type getKey(int i = 0) { + return getTestKey(i, dummy_key_ptr); + } + typename T::mapped_type getValue(int i = 0) { + return getTestValue(i, dummy_value_ptr); + } +}; -// Empty map tests for pointer map -TEST_F(DenseMapTest, EmptyPtrMapTest) { +template <typename T> +typename T::key_type *const DenseMapTest<T>::dummy_key_ptr = 0; +template <typename T> +typename T::mapped_type *const DenseMapTest<T>::dummy_value_ptr = 0; + +// Register these types for testing. +typedef ::testing::Types<DenseMap<uint32_t, uint32_t>, + DenseMap<uint32_t *, uint32_t *>, + DenseMap<CtorTester, CtorTester, CtorTesterMapInfo>, + SmallDenseMap<uint32_t, uint32_t>, + SmallDenseMap<uint32_t *, uint32_t *>, + SmallDenseMap<CtorTester, CtorTester, 4, + CtorTesterMapInfo> + > DenseMapTestTypes; +TYPED_TEST_CASE(DenseMapTest, DenseMapTestTypes); + +// Empty map tests +TYPED_TEST(DenseMapTest, EmptyIntMapTest) { // Size tests - EXPECT_EQ(0u, uintPtrMap.size()); - EXPECT_TRUE(uintPtrMap.empty()); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); // Iterator tests - EXPECT_TRUE(uintPtrMap.begin() == uintPtrMap.end()); + EXPECT_TRUE(this->Map.begin() == this->Map.end()); // Lookup tests - EXPECT_FALSE(uintPtrMap.count(&dummyInt)); - EXPECT_TRUE(uintPtrMap.find(&dummyInt) == uintPtrMap.begin()); - EXPECT_EQ(0, uintPtrMap.lookup(&dummyInt)); + EXPECT_FALSE(this->Map.count(this->getKey())); + EXPECT_TRUE(this->Map.find(this->getKey()) == this->Map.end()); +#ifndef _MSC_VER + EXPECT_EQ(typename TypeParam::mapped_type(), + this->Map.lookup(this->getKey())); +#else + // MSVC, at least old versions, cannot parse the typename to disambiguate + // TypeParam::mapped_type as a type. However, because MSVC doesn't implement + // two-phase name lookup, it also doesn't require the typename. Deal with + // this mutual incompatibility through specialized code. + EXPECT_EQ(TypeParam::mapped_type(), + this->Map.lookup(this->getKey())); +#endif } // Constant map tests -TEST_F(DenseMapTest, ConstEmptyMapTest) { - const DenseMap<uint32_t, uint32_t> & constUintMap = uintMap; - const DenseMap<uint32_t *, uint32_t *> & constUintPtrMap = uintPtrMap; - EXPECT_EQ(0u, constUintMap.size()); - EXPECT_EQ(0u, constUintPtrMap.size()); - EXPECT_TRUE(constUintMap.empty()); - EXPECT_TRUE(constUintPtrMap.empty()); - EXPECT_TRUE(constUintMap.begin() == constUintMap.end()); - EXPECT_TRUE(constUintPtrMap.begin() == constUintPtrMap.end()); +TYPED_TEST(DenseMapTest, ConstEmptyMapTest) { + const TypeParam &ConstMap = this->Map; + EXPECT_EQ(0u, ConstMap.size()); + EXPECT_TRUE(ConstMap.empty()); + EXPECT_TRUE(ConstMap.begin() == ConstMap.end()); } // A map with a single entry -TEST_F(DenseMapTest, SingleEntryMapTest) { - uintMap[0] = 1; +TYPED_TEST(DenseMapTest, SingleEntryMapTest) { + this->Map[this->getKey()] = this->getValue(); // Size tests - EXPECT_EQ(1u, uintMap.size()); - EXPECT_FALSE(uintMap.begin() == uintMap.end()); - EXPECT_FALSE(uintMap.empty()); + EXPECT_EQ(1u, this->Map.size()); + EXPECT_FALSE(this->Map.begin() == this->Map.end()); + EXPECT_FALSE(this->Map.empty()); // Iterator tests - DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin(); - EXPECT_EQ(0u, it->first); - EXPECT_EQ(1u, it->second); + typename TypeParam::iterator it = this->Map.begin(); + EXPECT_EQ(this->getKey(), it->first); + EXPECT_EQ(this->getValue(), it->second); ++it; - EXPECT_TRUE(it == uintMap.end()); + EXPECT_TRUE(it == this->Map.end()); // Lookup tests - EXPECT_TRUE(uintMap.count(0u)); - EXPECT_TRUE(uintMap.find(0u) == uintMap.begin()); - EXPECT_EQ(1u, uintMap.lookup(0u)); - EXPECT_EQ(1u, uintMap[0]); + EXPECT_TRUE(this->Map.count(this->getKey())); + EXPECT_TRUE(this->Map.find(this->getKey()) == this->Map.begin()); + EXPECT_EQ(this->getValue(), this->Map.lookup(this->getKey())); + EXPECT_EQ(this->getValue(), this->Map[this->getKey()]); } // Test clear() method -TEST_F(DenseMapTest, ClearTest) { - uintMap[0] = 1; - uintMap.clear(); +TYPED_TEST(DenseMapTest, ClearTest) { + this->Map[this->getKey()] = this->getValue(); + this->Map.clear(); - EXPECT_EQ(0u, uintMap.size()); - EXPECT_TRUE(uintMap.empty()); - EXPECT_TRUE(uintMap.begin() == uintMap.end()); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); + EXPECT_TRUE(this->Map.begin() == this->Map.end()); } // Test erase(iterator) method -TEST_F(DenseMapTest, EraseTest) { - uintMap[0] = 1; - uintMap.erase(uintMap.begin()); +TYPED_TEST(DenseMapTest, EraseTest) { + this->Map[this->getKey()] = this->getValue(); + this->Map.erase(this->Map.begin()); - EXPECT_EQ(0u, uintMap.size()); - EXPECT_TRUE(uintMap.empty()); - EXPECT_TRUE(uintMap.begin() == uintMap.end()); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); + EXPECT_TRUE(this->Map.begin() == this->Map.end()); } // Test erase(value) method -TEST_F(DenseMapTest, EraseTest2) { - uintMap[0] = 1; - uintMap.erase(0); +TYPED_TEST(DenseMapTest, EraseTest2) { + this->Map[this->getKey()] = this->getValue(); + this->Map.erase(this->getKey()); - EXPECT_EQ(0u, uintMap.size()); - EXPECT_TRUE(uintMap.empty()); - EXPECT_TRUE(uintMap.begin() == uintMap.end()); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); + EXPECT_TRUE(this->Map.begin() == this->Map.end()); } // Test insert() method -TEST_F(DenseMapTest, InsertTest) { - uintMap.insert(std::make_pair(0u, 1u)); - EXPECT_EQ(1u, uintMap.size()); - EXPECT_EQ(1u, uintMap[0]); +TYPED_TEST(DenseMapTest, InsertTest) { + this->Map.insert(std::make_pair(this->getKey(), this->getValue())); + EXPECT_EQ(1u, this->Map.size()); + EXPECT_EQ(this->getValue(), this->Map[this->getKey()]); } // Test copy constructor method -TEST_F(DenseMapTest, CopyConstructorTest) { - uintMap[0] = 1; - DenseMap<uint32_t, uint32_t> copyMap(uintMap); +TYPED_TEST(DenseMapTest, CopyConstructorTest) { + this->Map[this->getKey()] = this->getValue(); + TypeParam copyMap(this->Map); EXPECT_EQ(1u, copyMap.size()); - EXPECT_EQ(1u, copyMap[0]); + EXPECT_EQ(this->getValue(), copyMap[this->getKey()]); } // Test assignment operator method -TEST_F(DenseMapTest, AssignmentTest) { - uintMap[0] = 1; - DenseMap<uint32_t, uint32_t> copyMap = uintMap; +TYPED_TEST(DenseMapTest, AssignmentTest) { + this->Map[this->getKey()] = this->getValue(); + TypeParam copyMap = this->Map; EXPECT_EQ(1u, copyMap.size()); - EXPECT_EQ(1u, copyMap[0]); + EXPECT_EQ(this->getValue(), copyMap[this->getKey()]); +} + +// Test swap method +TYPED_TEST(DenseMapTest, SwapTest) { + this->Map[this->getKey()] = this->getValue(); + TypeParam otherMap; + + this->Map.swap(otherMap); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); + EXPECT_EQ(1u, otherMap.size()); + EXPECT_EQ(this->getValue(), otherMap[this->getKey()]); + + this->Map.swap(otherMap); + EXPECT_EQ(0u, otherMap.size()); + EXPECT_TRUE(otherMap.empty()); + EXPECT_EQ(1u, this->Map.size()); + EXPECT_EQ(this->getValue(), this->Map[this->getKey()]); + + // Make this more interesting by inserting 100 numbers into the map. + for (int i = 0; i < 100; ++i) + this->Map[this->getKey(i)] = this->getValue(i); + + this->Map.swap(otherMap); + EXPECT_EQ(0u, this->Map.size()); + EXPECT_TRUE(this->Map.empty()); + EXPECT_EQ(100u, otherMap.size()); + for (int i = 0; i < 100; ++i) + EXPECT_EQ(this->getValue(i), otherMap[this->getKey(i)]); + + this->Map.swap(otherMap); + EXPECT_EQ(0u, otherMap.size()); + EXPECT_TRUE(otherMap.empty()); + EXPECT_EQ(100u, this->Map.size()); + for (int i = 0; i < 100; ++i) + EXPECT_EQ(this->getValue(i), this->Map[this->getKey(i)]); } // A more complex iteration test -TEST_F(DenseMapTest, IterationTest) { +TYPED_TEST(DenseMapTest, IterationTest) { bool visited[100]; + std::map<typename TypeParam::key_type, unsigned> visitedIndex; // Insert 100 numbers into the map for (int i = 0; i < 100; ++i) { visited[i] = false; - uintMap[i] = 3; + visitedIndex[this->getKey(i)] = i; + + this->Map[this->getKey(i)] = this->getValue(i); } // Iterate over all numbers and mark each one found. - for (DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin(); - it != uintMap.end(); ++it) { - visited[it->first] = true; - } + for (typename TypeParam::iterator it = this->Map.begin(); + it != this->Map.end(); ++it) + visited[visitedIndex[it->first]] = true; // Ensure every number was visited. - for (int i = 0; i < 100; ++i) { + for (int i = 0; i < 100; ++i) ASSERT_TRUE(visited[i]) << "Entry #" << i << " was never visited"; - } } // const_iterator test -TEST_F(DenseMapTest, ConstIteratorTest) { +TYPED_TEST(DenseMapTest, ConstIteratorTest) { // Check conversion from iterator to const_iterator. - DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin(); - DenseMap<uint32_t, uint32_t>::const_iterator cit(it); + typename TypeParam::iterator it = this->Map.begin(); + typename TypeParam::const_iterator cit(it); EXPECT_TRUE(it == cit); // Check copying of const_iterators. - DenseMap<uint32_t, uint32_t>::const_iterator cit2(cit); + typename TypeParam::const_iterator cit2(cit); EXPECT_TRUE(cit == cit2); } @@ -194,7 +307,7 @@ struct TestDenseMapInfo { }; // find_as() tests -TEST_F(DenseMapTest, FindAsTest) { +TEST(DenseMapCustomTest, FindAsTest) { DenseMap<unsigned, unsigned, TestDenseMapInfo> map; map[0] = 1; map[1] = 2; diff --git a/unittests/ADT/SmallBitVectorTest.cpp b/unittests/ADT/SmallBitVectorTest.cpp deleted file mode 100644 index c4dda9e88d..0000000000 --- a/unittests/ADT/SmallBitVectorTest.cpp +++ /dev/null @@ -1,196 +0,0 @@ -//===- llvm/unittest/ADT/SmallBitVectorTest.cpp - SmallBitVector tests ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallBitVector.h" -#include "gtest/gtest.h" - -using namespace llvm; - -namespace { - -TEST(SmallBitVectorTest, TrivialOperation) { - SmallBitVector Vec; - EXPECT_EQ(0U, Vec.count()); - EXPECT_EQ(0U, Vec.size()); - EXPECT_FALSE(Vec.any()); - EXPECT_TRUE(Vec.all()); - EXPECT_TRUE(Vec.none()); - EXPECT_TRUE(Vec.empty()); - - Vec.resize(5, true); - EXPECT_EQ(5U, Vec.count()); - EXPECT_EQ(5U, Vec.size()); - EXPECT_TRUE(Vec.any()); - EXPECT_TRUE(Vec.all()); - EXPECT_FALSE(Vec.none()); - EXPECT_FALSE(Vec.empty()); - - Vec.resize(11); - EXPECT_EQ(5U, Vec.count()); - EXPECT_EQ(11U, Vec.size()); - EXPECT_TRUE(Vec.any()); - EXPECT_FALSE(Vec.all()); - EXPECT_FALSE(Vec.none()); - EXPECT_FALSE(Vec.empty()); - - SmallBitVector Inv = ~Vec; - EXPECT_EQ(6U, Inv.count()); - EXPECT_EQ(11U, Inv.size()); - EXPECT_TRUE(Inv.any()); - EXPECT_FALSE(Inv.all()); - EXPECT_FALSE(Inv.none()); - EXPECT_FALSE(Inv.empty()); - - EXPECT_FALSE(Inv == Vec); - EXPECT_TRUE(Inv != Vec); - Vec = ~Vec; - EXPECT_TRUE(Inv == Vec); - EXPECT_FALSE(Inv != Vec); - - // Add some "interesting" data to Vec. - Vec.resize(23, true); - Vec.resize(25, false); - Vec.resize(26, true); - Vec.resize(29, false); - Vec.resize(33, true); - Vec.resize(57, false); - unsigned Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { - ++Count; - EXPECT_TRUE(Vec[i]); - EXPECT_TRUE(Vec.test(i)); - } - EXPECT_EQ(Count, Vec.count()); - EXPECT_EQ(Count, 23u); - EXPECT_FALSE(Vec[0]); - EXPECT_TRUE(Vec[32]); - EXPECT_FALSE(Vec[56]); - Vec.resize(61, false); - - SmallBitVector Copy = Vec; - SmallBitVector Alt(3, false); - Alt.resize(6, true); - std::swap(Alt, Vec); - EXPECT_TRUE(Copy == Alt); - EXPECT_TRUE(Vec.size() == 6); - EXPECT_TRUE(Vec.count() == 3); - EXPECT_TRUE(Vec.find_first() == 3); - std::swap(Copy, Vec); - - // Add some more "interesting" data. - Vec.resize(68, true); - Vec.resize(78, false); - Vec.resize(89, true); - Vec.resize(90, false); - Vec.resize(91, true); - Vec.resize(130, false); - Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { - ++Count; - EXPECT_TRUE(Vec[i]); - EXPECT_TRUE(Vec.test(i)); - } - EXPECT_EQ(Count, Vec.count()); - EXPECT_EQ(Count, 42u); - EXPECT_FALSE(Vec[0]); - EXPECT_TRUE(Vec[32]); - EXPECT_FALSE(Vec[60]); - EXPECT_FALSE(Vec[129]); - - Vec.flip(60); - EXPECT_TRUE(Vec[60]); - EXPECT_EQ(Count + 1, Vec.count()); - Vec.flip(60); - EXPECT_FALSE(Vec[60]); - EXPECT_EQ(Count, Vec.count()); - - Vec.reset(32); - EXPECT_FALSE(Vec[32]); - EXPECT_EQ(Count - 1, Vec.count()); - Vec.set(32); - EXPECT_TRUE(Vec[32]); - EXPECT_EQ(Count, Vec.count()); - - Vec.flip(); - EXPECT_EQ(Vec.size() - Count, Vec.count()); - - Vec.reset(); - EXPECT_EQ(0U, Vec.count()); - EXPECT_EQ(130U, Vec.size()); - EXPECT_FALSE(Vec.any()); - EXPECT_FALSE(Vec.all()); - EXPECT_TRUE(Vec.none()); - EXPECT_FALSE(Vec.empty()); - - Inv = ~SmallBitVector(); - EXPECT_EQ(0U, Inv.count()); - EXPECT_EQ(0U, Inv.size()); - EXPECT_FALSE(Inv.any()); - EXPECT_TRUE(Inv.all()); - EXPECT_TRUE(Inv.none()); - EXPECT_TRUE(Inv.empty()); - - Vec.clear(); - EXPECT_EQ(0U, Vec.count()); - EXPECT_EQ(0U, Vec.size()); - EXPECT_FALSE(Vec.any()); - EXPECT_TRUE(Vec.all()); - EXPECT_TRUE(Vec.none()); - EXPECT_TRUE(Vec.empty()); -} - -TEST(SmallBitVectorTest, CompoundAssignment) { - SmallBitVector A; - A.resize(10); - A.set(4); - A.set(7); - - SmallBitVector B; - B.resize(50); - B.set(5); - B.set(18); - - A |= B; - EXPECT_TRUE(A.test(4)); - EXPECT_TRUE(A.test(5)); - EXPECT_TRUE(A.test(7)); - EXPECT_TRUE(A.test(18)); - EXPECT_EQ(4U, A.count()); - EXPECT_EQ(50U, A.size()); - - B.resize(10); - B.set(); - B.reset(2); - B.reset(7); - A &= B; - EXPECT_FALSE(A.test(2)); - EXPECT_FALSE(A.test(7)); - EXPECT_EQ(2U, A.count()); - EXPECT_EQ(50U, A.size()); - - B.resize(100); - B.set(); - - A ^= B; - EXPECT_TRUE(A.test(2)); - EXPECT_TRUE(A.test(7)); - EXPECT_EQ(98U, A.count()); - EXPECT_EQ(100U, A.size()); -} - -TEST(SmallBitVectorTest, ProxyIndex) { - SmallBitVector Vec(3); - EXPECT_TRUE(Vec.none()); - Vec[0] = Vec[1] = Vec[2] = true; - EXPECT_EQ(Vec.size(), Vec.count()); - Vec[2] = Vec[1] = Vec[0] = false; - EXPECT_TRUE(Vec.none()); -} - -} diff --git a/unittests/ADT/SmallMapTest.cpp b/unittests/ADT/SmallMapTest.cpp deleted file mode 100644 index 5729717e0f..0000000000 --- a/unittests/ADT/SmallMapTest.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===- llvm/unittest/ADT/SmallMapTest.cpp ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// SmallMap unit tests. -// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" -#include "llvm/ADT/SmallMap.h" - -using namespace llvm; - -// SmallMap test. -TEST(SmallMapTest, GeneralTest) { - - int buf[10]; - - SmallMap<int *, int, 3> a; - SmallMap<int *, int, 3> b; - SmallMap<int *, int, 3>::iterator found; - std::pair<SmallMap<int *, int, 3>::iterator, bool> insRes; - SmallMap<int *, int, 3>::const_iterator foundc; - - a.insert(std::make_pair(&buf[0], 0)); - insRes = a.insert(std::make_pair(&buf[1], 1)); - EXPECT_TRUE(insRes.second); - - // Check insertion, looking up, and data editing in small mode. - insRes = a.insert(std::make_pair(&buf[1], 6)); - EXPECT_FALSE(insRes.second); - EXPECT_EQ(insRes.first->second, 1); - insRes.first->second = 5; - found = a.find(&buf[1]); - EXPECT_NE(found, a.end()); - EXPECT_EQ(found->second, 5); - a[&buf[1]] = 10; - EXPECT_EQ(found->second, 10); - // Check "not found" case. - found = a.find(&buf[8]); - EXPECT_EQ(found, a.end()); - - b.insert(std::make_pair(&buf[2], 2)); - - std::swap(a, b); - a.swap(b); - std::swap(a, b); - - EXPECT_EQ(1U, a.size()); - EXPECT_EQ(2U, b.size()); - EXPECT_TRUE(a.count(&buf[2])); - EXPECT_TRUE(b.count(&buf[0])); - EXPECT_TRUE(b.count(&buf[1])); - - insRes = b.insert(std::make_pair(&buf[3], 3)); - EXPECT_TRUE(insRes.second); - - // Check insertion, looking up, and data editing in big mode. - insRes = b.insert(std::make_pair(&buf[3], 6)); - EXPECT_FALSE(insRes.second); - EXPECT_EQ(insRes.first->second, 3); - insRes.first->second = 7; - found = b.find(&buf[3]); - EXPECT_EQ(found->second, 7); - b[&buf[3]] = 14; - EXPECT_EQ(found->second, 14); - // Check constant looking up. - foundc = b.find(&buf[3]); - EXPECT_EQ(foundc->first, &buf[3]); - EXPECT_EQ(foundc->second, 14); - // Check not found case. - found = b.find(&buf[8]); - EXPECT_EQ(found, b.end()); - - std::swap(a, b); - a.swap(b); - std::swap(a, b); - - EXPECT_EQ(3U, a.size()); - EXPECT_EQ(1U, b.size()); - EXPECT_TRUE(a.count(&buf[0])); - EXPECT_TRUE(a.count(&buf[1])); - EXPECT_TRUE(a.count(&buf[3])); - EXPECT_TRUE(b.count(&buf[2])); - EXPECT_EQ(b.find(&buf[2])->second, 2); - - std::swap(a, b); - a.swap(b); - std::swap(a, b); - - EXPECT_EQ(1U, a.size()); - EXPECT_EQ(3U, b.size()); - EXPECT_TRUE(a.count(&buf[2])); - EXPECT_TRUE(b.count(&buf[0])); - EXPECT_TRUE(b.count(&buf[1])); - EXPECT_TRUE(b.count(&buf[3])); - - a.insert(std::make_pair(&buf[4], 4)); - a.insert(std::make_pair(&buf[5], 5)); - a.insert(std::make_pair(&buf[6], 6)); - - std::swap(b, a); - - EXPECT_EQ(3U, a.size()); - EXPECT_EQ(4U, b.size()); - EXPECT_TRUE(b.count(&buf[2])); - EXPECT_TRUE(b.count(&buf[4])); - EXPECT_TRUE(b.count(&buf[5])); - EXPECT_TRUE(b.count(&buf[6])); - EXPECT_TRUE(a.count(&buf[0])); - EXPECT_TRUE(a.count(&buf[1])); - EXPECT_TRUE(a.count(&buf[3])); - - // Check findAndConstruct - SmallMap<int *, int, 3>::value_type Buf7; - Buf7 = a.FindAndConstruct(&buf[7]); - EXPECT_EQ(Buf7.second, 0); - - // Check increments - - SmallMap<int *, int, 2> c; - c.insert(std::make_pair(&buf[0], 0)); - c.insert(std::make_pair(&buf[1], 1)); - - // For small mode we know that flat array map is used and we know the - // order of items. - unsigned ii = 0; - for (SmallMap<int *, int, 2>::iterator i = c.begin(), e = c.end(); - i != e; ++i, ++ii) { - EXPECT_TRUE((i->first == &buf[0] && i->second == 0 && ii == 0) || - (i->first == &buf[1] && i->second == 1 && ii == 1)); - } - - // For big mode DenseMap is used and final order of items is undefined. - c.insert(std::make_pair(&buf[2], 2)); - for (SmallMap<int *, int, 2>::iterator i = c.begin(), e = c.end(); - i != e; ++i) { - EXPECT_TRUE((i->first == &buf[0] && i->second == 0) || - (i->first == &buf[1] && i->second == 1) || - (i->first == &buf[2] && i->second == 2)); - } - - // Check that iteration only visits elements that actually exist. - SmallMap<int, int, 8> d; - d[0] = 2; - d[1] = 3; - int counts[2] = { 0, 0 }; - for (SmallMap<int, int, 8>::iterator I = d.begin(), E = d.end(); I != E; - ++I) { - EXPECT_TRUE(I->first == 0 || I->first == 1); - EXPECT_TRUE(I->second == 2 || I->second == 3); - EXPECT_EQ(I->second, I->first + 2); - ++counts[I->first]; - } - EXPECT_EQ(counts[0], 1); - EXPECT_EQ(counts[1], 1); -} diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp index c2542d614e..d35e5bc08a 100644 --- a/unittests/ADT/SmallVectorTest.cpp +++ b/unittests/ADT/SmallVectorTest.cpp @@ -342,7 +342,9 @@ TEST_F(SmallVectorTest, InsertTest) { SCOPED_TRACE("InsertTest"); makeSequence(theVector, 1, 3); - theVector.insert(theVector.begin() + 1, Constructable(77)); + VectorType::iterator I = + theVector.insert(theVector.begin() + 1, Constructable(77)); + EXPECT_EQ(theVector.begin() + 1, I); assertValuesInOrder(theVector, 4u, 1, 77, 2, 3); } @@ -351,17 +353,48 @@ TEST_F(SmallVectorTest, InsertRepeatedTest) { SCOPED_TRACE("InsertRepeatedTest"); makeSequence(theVector, 10, 15); - theVector.insert(theVector.begin() + 1, 2, Constructable(16)); + VectorType::iterator I = + theVector.insert(theVector.begin() + 1, 2, Constructable(16)); + EXPECT_EQ(theVector.begin() + 1, I); assertValuesInOrder(theVector, 8u, 10, 16, 16, 11, 12, 13, 14, 15); + + // Insert at end. + I = theVector.insert(theVector.end(), 2, Constructable(16)); + EXPECT_EQ(theVector.begin() + 8, I); + assertValuesInOrder(theVector, 10u, 10, 16, 16, 11, 12, 13, 14, 15, 16, 16); + + // Empty insert. + EXPECT_EQ(theVector.end(), + theVector.insert(theVector.end(), 0, Constructable(42))); + EXPECT_EQ(theVector.begin() + 1, + theVector.insert(theVector.begin() + 1, 0, Constructable(42))); } // Insert range. TEST_F(SmallVectorTest, InsertRangeTest) { - SCOPED_TRACE("InsertRepeatedTest"); + SCOPED_TRACE("InsertRangeTest"); + + Constructable Arr[3] = + { Constructable(77), Constructable(77), Constructable(77) }; makeSequence(theVector, 1, 3); - theVector.insert(theVector.begin() + 1, 3, Constructable(77)); + VectorType::iterator I = + theVector.insert(theVector.begin() + 1, Arr, Arr+3); + EXPECT_EQ(theVector.begin() + 1, I); assertValuesInOrder(theVector, 6u, 1, 77, 77, 77, 2, 3); + + // Insert at end. + I = theVector.insert(theVector.end(), Arr, Arr+3); + EXPECT_EQ(theVector.begin() + 6, I); + assertValuesInOrder(theVector, 9u, 1, 77, 77, 77, 2, 3, 77, 77, 77); + + // Empty insert. + EXPECT_EQ(theVector.end(), theVector.insert(theVector.end(), + theVector.begin(), + theVector.begin())); + EXPECT_EQ(theVector.begin() + 1, theVector.insert(theVector.begin() + 1, + theVector.begin(), + theVector.begin())); } // Comparison tests. diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp index 2ae58204e1..5bb65cbd7a 100644 --- a/unittests/ADT/StringMapTest.cpp +++ b/unittests/ADT/StringMapTest.cpp @@ -75,7 +75,6 @@ const std::string StringMapTest::testKeyStr(testKey); // Empty map tests. TEST_F(StringMapTest, EmptyMapTest) { - SCOPED_TRACE("EmptyMapTest"); assertEmptyMap(); } @@ -102,14 +101,12 @@ TEST_F(StringMapTest, ConstEmptyMapTest) { // A map with a single entry. TEST_F(StringMapTest, SingleEntryMapTest) { - SCOPED_TRACE("SingleEntryMapTest"); testMap[testKey] = testValue; assertSingleItemMap(); } // Test clear() method. TEST_F(StringMapTest, ClearTest) { - SCOPED_TRACE("ClearTest"); testMap[testKey] = testValue; testMap.clear(); assertEmptyMap(); @@ -117,7 +114,6 @@ TEST_F(StringMapTest, ClearTest) { // Test erase(iterator) method. TEST_F(StringMapTest, EraseIteratorTest) { - SCOPED_TRACE("EraseIteratorTest"); testMap[testKey] = testValue; testMap.erase(testMap.begin()); assertEmptyMap(); @@ -125,7 +121,6 @@ TEST_F(StringMapTest, EraseIteratorTest) { // Test erase(value) method. TEST_F(StringMapTest, EraseValueTest) { - SCOPED_TRACE("EraseValueTest"); testMap[testKey] = testValue; testMap.erase(testKey); assertEmptyMap(); @@ -133,13 +128,34 @@ TEST_F(StringMapTest, EraseValueTest) { // Test inserting two values and erasing one. TEST_F(StringMapTest, InsertAndEraseTest) { - SCOPED_TRACE("InsertAndEraseTest"); testMap[testKey] = testValue; testMap["otherKey"] = 2; testMap.erase("otherKey"); assertSingleItemMap(); } +TEST_F(StringMapTest, SmallFullMapTest) { + // StringMap has a tricky corner case when the map is small (<8 buckets) and + // it fills up through a balanced pattern of inserts and erases. This can + // lead to inf-loops in some cases (PR13148) so we test it explicitly here. + llvm::StringMap<int> Map(2); + + Map["eins"] = 1; + Map["zwei"] = 2; + Map["drei"] = 3; + Map.erase("drei"); + Map.erase("eins"); + Map["veir"] = 4; + Map["funf"] = 5; + + EXPECT_EQ(3u, Map.size()); + EXPECT_EQ(0, Map.lookup("eins")); + EXPECT_EQ(2, Map.lookup("zwei")); + EXPECT_EQ(0, Map.lookup("drei")); + EXPECT_EQ(4, Map.lookup("veir")); + EXPECT_EQ(5, Map.lookup("funf")); +} + // A more complex iteration test. TEST_F(StringMapTest, IterationTest) { bool visited[100]; diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt new file mode 100644 index 0000000000..7991a4101c --- /dev/null +++ b/unittests/Analysis/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS + Analysis + ) + +add_llvm_unittest(AnalysisTests + ScalarEvolutionTest.cpp + ) diff --git a/unittests/Analysis/Makefile b/unittests/Analysis/Makefile index f89240ec70..b548d25d1e 100644 --- a/unittests/Analysis/Makefile +++ b/unittests/Analysis/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. TESTNAME = Analysis -LINK_COMPONENTS := core support target analysis ipa +LINK_COMPONENTS := analysis include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest diff --git a/unittests/Bitcode/CMakeLists.txt b/unittests/Bitcode/CMakeLists.txt new file mode 100644 index 0000000000..d8f5fe1f19 --- /dev/null +++ b/unittests/Bitcode/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_LINK_COMPONENTS + BitReader + BitWriter + ) + +add_llvm_unittest(BitcodeTests + BitReaderTest.cpp + ) diff --git a/unittests/Bitcode/Makefile b/unittests/Bitcode/Makefile index aa437e7e2c..fcec87948f 100644 --- a/unittests/Bitcode/Makefile +++ b/unittests/Bitcode/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. TESTNAME = Bitcode -LINK_COMPONENTS := core support bitreader bitwriter +LINK_COMPONENTS := bitreader bitwriter include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 11a5a5ac97..84bd44439e 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -1,184 +1,14 @@ -function(add_llvm_unittest test_dirname) - string(REGEX MATCH "([^/]+)$" test_name ${test_dirname}) - if (CMAKE_BUILD_TYPE) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY - ${LLVM_BINARY_DIR}/unittests/${test_dirname}/${CMAKE_BUILD_TYPE}) - else() - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY - ${LLVM_BINARY_DIR}/unittests/${test_dirname}) - endif() - if( NOT LLVM_BUILD_TESTS ) - set(EXCLUDE_FROM_ALL ON) - endif() - add_llvm_executable(${test_name}Tests ${ARGN}) - add_dependencies(UnitTests ${test_name}Tests) - set_target_properties(${test_name}Tests PROPERTIES FOLDER "Tests") -endfunction() - -# Visual Studio 2012 only supports up to 8 template parameters in -# std::tr1::tuple by default, but gtest requires 10 -if(MSVC AND MSVC_VERSION EQUAL 1700) - add_definitions(-D_VARIADIC_MAX=10) -endif () - add_custom_target(UnitTests) set_target_properties(UnitTests PROPERTIES FOLDER "Tests") -include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) -add_definitions(-DGTEST_HAS_RTTI=0) -if( LLVM_COMPILER_IS_GCC_COMPATIBLE ) - llvm_replace_compiler_option(CMAKE_CXX_FLAGS "-frtti" "-fno-rtti") -elseif( MSVC ) - llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/GR" "/GR-") -endif() - -if (NOT LLVM_ENABLE_THREADS) - add_definitions(-DGTEST_HAS_PTHREAD=0) -endif() - -if(SUPPORTS_NO_VARIADIC_MACROS_FLAG) - add_definitions("-Wno-variadic-macros") -endif() - -set(LLVM_LINK_COMPONENTS - jit - interpreter - nativecodegen - BitWriter - BitReader - AsmParser - Core - Support - ) - -set(LLVM_USED_LIBS - gtest - gtest_main - LLVMSupport # gtest needs it for raw_ostream. - ) - -add_llvm_unittest(ADT - ADT/APFloatTest.cpp - ADT/APIntTest.cpp - ADT/BitVectorTest.cpp - ADT/DAGDeltaAlgorithmTest.cpp - ADT/DeltaAlgorithmTest.cpp - ADT/DenseMapTest.cpp - ADT/DenseSetTest.cpp - ADT/FoldingSet.cpp - ADT/HashingTest.cpp - ADT/ilistTest.cpp - ADT/ImmutableSetTest.cpp - ADT/IntEqClassesTest.cpp - ADT/IntervalMapTest.cpp - ADT/IntrusiveRefCntPtrTest.cpp - ADT/PackedVectorTest.cpp - ADT/SmallBitVectorTest.cpp - ADT/SmallStringTest.cpp - ADT/SmallVectorTest.cpp - ADT/SparseBitVectorTest.cpp - ADT/SparseSetTest.cpp - ADT/StringMapTest.cpp - ADT/StringRefTest.cpp - ADT/TripleTest.cpp - ADT/TwineTest.cpp - ADT/VariadicFunctionTest.cpp - ) - -add_llvm_unittest(Analysis - Analysis/ScalarEvolutionTest.cpp - ) - -add_llvm_unittest(ExecutionEngine - ExecutionEngine/ExecutionEngineTest.cpp - ) - -if( LLVM_USE_INTEL_JITEVENTS ) - include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ) - link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} ) - set(ProfileTestSources - ExecutionEngine/JIT/IntelJITEventListenerTest.cpp - ) - set(LLVM_LINK_COMPONENTS - ${LLVM_LINK_COMPONENTS} - IntelJITEvents - ) -endif( LLVM_USE_INTEL_JITEVENTS ) - -if( LLVM_USE_OPROFILE ) - set(ProfileTestSources - ${ProfileTestSources} - ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp - ) - set(LLVM_LINK_COMPONENTS - ${LLVM_LINK_COMPONENTS} - OProfileJIT - ) -endif( LLVM_USE_OPROFILE ) - -set(JITTestsSources - ExecutionEngine/JIT/JITEventListenerTest.cpp - ExecutionEngine/JIT/JITMemoryManagerTest.cpp - ExecutionEngine/JIT/JITTest.cpp - ExecutionEngine/JIT/MultiJITTest.cpp - ${ProfileTestSources} - ) - -if(MSVC) - list(APPEND JITTestsSources ExecutionEngine/JIT/JITTests.def) -endif() - -add_llvm_unittest(ExecutionEngine/JIT ${JITTestsSources}) - -if(MINGW OR CYGWIN) - set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols) -endif() - -add_llvm_unittest(Transforms/Utils - Transforms/Utils/Cloning.cpp - ) - -set(VMCoreSources - VMCore/ConstantsTest.cpp - VMCore/InstructionsTest.cpp - VMCore/MetadataTest.cpp - VMCore/PassManagerTest.cpp - VMCore/ValueMapTest.cpp - VMCore/VerifierTest.cpp - VMCore/DominatorTreeTest.cpp - ) - -# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug. -# See issue#331418 in Visual Studio. -if(MSVC AND MSVC_VERSION LESS 1600) - list(REMOVE_ITEM VMCoreSources VMCore/ValueMapTest.cpp) -endif() - -add_llvm_unittest(VMCore ${VMCoreSources}) - -add_llvm_unittest(Bitcode - Bitcode/BitReaderTest.cpp - ) - -set(LLVM_LINK_COMPONENTS - Support - Core - ) +function(add_llvm_unittest test_dirname) + add_unittest(UnitTests ${test_dirname} ${ARGN}) +endfunction() -add_llvm_unittest(Support - Support/AllocatorTest.cpp - Support/Casting.cpp - Support/CommandLineTest.cpp - Support/ConstantRangeTest.cpp - Support/EndianTest.cpp - Support/LeakDetectorTest.cpp - Support/MathExtrasTest.cpp - Support/Path.cpp - Support/raw_ostream_test.cpp - Support/RegexTest.cpp - Support/SwapByteOrderTest.cpp - Support/TimeValue.cpp - Support/TypeBuilderTest.cpp - Support/ValueHandleTest.cpp - Support/YAMLParserTest.cpp - ) +add_subdirectory(ADT) +add_subdirectory(Analysis) +add_subdirectory(ExecutionEngine) +add_subdirectory(Bitcode) +add_subdirectory(Support) +add_subdirectory(Transforms) +add_subdirectory(VMCore) diff --git a/unittests/ExecutionEngine/CMakeLists.txt b/unittests/ExecutionEngine/CMakeLists.txt new file mode 100644 index 0000000000..5fffadd4ca --- /dev/null +++ b/unittests/ExecutionEngine/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + interpreter + ) + +add_llvm_unittest(ExecutionEngineTests + ExecutionEngineTest.cpp + ) + +add_subdirectory(JIT) diff --git a/unittests/ExecutionEngine/JIT/CMakeLists.txt b/unittests/ExecutionEngine/JIT/CMakeLists.txt new file mode 100644 index 0000000000..d43d72de40 --- /dev/null +++ b/unittests/ExecutionEngine/JIT/CMakeLists.txt @@ -0,0 +1,57 @@ +set(LLVM_LINK_COMPONENTS + asmparser + bitreader + bitwriter + jit + nativecodegen + ) + +# HACK: Declare a couple of source files as optionally compiled to satisfy the +# missing-file-checker in LLVM's weird CMake build. +set(LLVM_OPTIONAL_SOURCES + IntelJITEventListenerTest.cpp + OProfileJITEventListenerTest.cpp + ) + +if( LLVM_USE_INTEL_JITEVENTS ) + include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ) + link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} ) + set(ProfileTestSources + IntelJITEventListenerTest.cpp + ) + set(LLVM_LINK_COMPONENTS + ${LLVM_LINK_COMPONENTS} + IntelJITEvents + ) +endif( LLVM_USE_INTEL_JITEVENTS ) + +if( LLVM_USE_OPROFILE ) + set(ProfileTestSources + ${ProfileTestSources} + OProfileJITEventListenerTest.cpp + ) + set(LLVM_LINK_COMPONENTS + ${LLVM_LINK_COMPONENTS} + OProfileJIT + ) +endif( LLVM_USE_OPROFILE ) + +set(JITTestsSources + JITEventListenerTest.cpp + JITMemoryManagerTest.cpp + JITTest.cpp + MultiJITTest.cpp + ${ProfileTestSources} + ) + +if(MSVC) + list(APPEND JITTestsSources JITTests.def) +endif() + +add_llvm_unittest(JITTests + ${JITTestsSources} + ) + +if(MINGW OR CYGWIN) + set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols) +endif() diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h index 53608cbfce..d669ecc03d 100644 --- a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h +++ b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h @@ -10,18 +10,18 @@ #ifndef JIT_EVENT_LISTENER_TEST_COMMON_H #define JIT_EVENT_LISTENER_TEST_COMMON_H -#include "llvm/Analysis/DIBuilder.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" #include "llvm/CodeGen/MachineCodeInfo.h" -#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JIT.h" #include "llvm/ExecutionEngine/JITEventListener.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/TypeBuilder.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TypeBuilder.h" +#include "llvm/Config/config.h" #include "gtest/gtest.h" diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp index dcc58d49c4..8780aa556c 100644 --- a/unittests/ExecutionEngine/JIT/JITTest.cpp +++ b/unittests/ExecutionEngine/JIT/JITTest.cpp @@ -7,29 +7,29 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Assembly/Parser.h" #include "llvm/BasicBlock.h" -#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Constant.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/ExecutionEngine/JIT.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" +#include "llvm/Type.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TypeBuilder.h" #include "llvm/Support/TargetSelect.h" -#include "llvm/Type.h" +#include "llvm/Support/TypeBuilder.h" +#include "gtest/gtest.h" #include <vector> using namespace llvm; diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile index c404fb002a..b535a6b296 100644 --- a/unittests/ExecutionEngine/JIT/Makefile +++ b/unittests/ExecutionEngine/JIT/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../.. TESTNAME = JIT -LINK_COMPONENTS := asmparser bitreader bitwriter core jit native support +LINK_COMPONENTS := asmparser bitreader bitwriter jit native include $(LEVEL)/Makefile.config diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile index a0395cdad3..63508d2399 100644 --- a/unittests/ExecutionEngine/Makefile +++ b/unittests/ExecutionEngine/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. TESTNAME = ExecutionEngine -LINK_COMPONENTS := engine interpreter +LINK_COMPONENTS :=interpreter PARALLEL_DIRS = JIT include $(LEVEL)/Makefile.config diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp new file mode 100644 index 0000000000..c45db2cdc0 --- /dev/null +++ b/unittests/Support/AlignOfTest.cpp @@ -0,0 +1,328 @@ +//===- llvm/unittest/Support/AlignOfTest.cpp - Alignment utility tests ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" + +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +// Disable warnings about questionable type definitions. +// We're testing that even questionable types work with the alignment utilities. +#ifdef _MSC_VER +#pragma warning(disable:4584) +#endif + +// Define some fixed alignment types to use in these tests. +#if __cplusplus == 201103L || __has_feature(cxx_alignas) +typedef char alignas(1) A1; +typedef char alignas(2) A2; +typedef char alignas(4) A4; +typedef char alignas(8) A8; +#elif defined(__clang__) || defined(__GNUC__) +typedef char A1 __attribute__((aligned(1))); +typedef char A2 __attribute__((aligned(2))); +typedef char A4 __attribute__((aligned(4))); +typedef char A8 __attribute__((aligned(8))); +#elif defined(_MSC_VER) +typedef __declspec(align(1)) char A1; +typedef __declspec(align(2)) char A2; +typedef __declspec(align(4)) char A4; +typedef __declspec(align(8)) char A8; +#else +# error No supported align as directive. +#endif + +// Wrap the forced aligned types in structs to hack around compiler bugs. +struct SA1 { A1 a; }; +struct SA2 { A2 a; }; +struct SA4 { A4 a; }; +struct SA8 { A8 a; }; + +struct S1 {}; +struct S2 { char a; }; +struct S3 { int x; }; +struct S4 { double y; }; +struct S5 { A1 a1; A2 a2; A4 a4; A8 a8; }; +struct S6 { double f(); }; +struct D1 : S1 {}; +struct D2 : S6 { float g(); }; +struct D3 : S2 {}; +struct D4 : S2 { int x; }; +struct D5 : S3 { char c; }; +struct D6 : S2, S3 {}; +struct D7 : S1, S3 {}; +struct D8 : S1, D4, D5 { double x[2]; }; +struct D9 : S1, D1 { S1 s1; }; +struct V1 { virtual ~V1(); }; +struct V2 { int x; virtual ~V2(); }; +struct V3 : V1 { virtual ~V3(); }; +struct V4 : virtual V2 { int y; virtual ~V4(); }; +struct V5 : V4, V3 { double z; virtual ~V5(); }; +struct V6 : S1 { virtual ~V6(); }; +struct V7 : virtual V2, virtual V6 { virtual ~V7(); }; +struct V8 : V5, virtual V6, V7 { double zz; virtual ~V8(); }; + +// Ensure alignment is a compile-time constant. +char LLVM_ATTRIBUTE_UNUSED test_arr1 + [AlignOf<char>::Alignment > 0] + [AlignOf<short>::Alignment > 0] + [AlignOf<int>::Alignment > 0] + [AlignOf<long>::Alignment > 0] + [AlignOf<long long>::Alignment > 0] + [AlignOf<float>::Alignment > 0] + [AlignOf<double>::Alignment > 0] + [AlignOf<long double>::Alignment > 0] + [AlignOf<void *>::Alignment > 0] + [AlignOf<int *>::Alignment > 0] + [AlignOf<double (*)(double)>::Alignment > 0] + [AlignOf<double (S6::*)()>::Alignment > 0]; +char LLVM_ATTRIBUTE_UNUSED test_arr2 + [AlignOf<A1>::Alignment > 0] + [AlignOf<A2>::Alignment > 0] + [AlignOf<A4>::Alignment > 0] + [AlignOf<A8>::Alignment > 0] + [AlignOf<SA1>::Alignment > 0] + [AlignOf<SA2>::Alignment > 0] + [AlignOf<SA4>::Alignment > 0] + [AlignOf<SA8>::Alignment > 0]; +char LLVM_ATTRIBUTE_UNUSED test_arr3 + [AlignOf<S1>::Alignment > 0] + [AlignOf<S2>::Alignment > 0] + [AlignOf<S3>::Alignment > 0] + [AlignOf<S4>::Alignment > 0] + [AlignOf<S5>::Alignment > 0] + [AlignOf<S6>::Alignment > 0]; +char LLVM_ATTRIBUTE_UNUSED test_arr4 + [AlignOf<D1>::Alignment > 0] + [AlignOf<D2>::Alignment > 0] + [AlignOf<D3>::Alignment > 0] + [AlignOf<D4>::Alignment > 0] + [AlignOf<D5>::Alignment > 0] + [AlignOf<D6>::Alignment > 0] + [AlignOf<D7>::Alignment > 0] + [AlignOf<D8>::Alignment > 0] + [AlignOf<D9>::Alignment > 0]; +char LLVM_ATTRIBUTE_UNUSED test_arr5 + [AlignOf<V1>::Alignment > 0] + [AlignOf<V2>::Alignment > 0] + [AlignOf<V3>::Alignment > 0] + [AlignOf<V4>::Alignment > 0] + [AlignOf<V5>::Alignment > 0] + [AlignOf<V6>::Alignment > 0] + [AlignOf<V7>::Alignment > 0] + [AlignOf<V8>::Alignment > 0]; + +TEST(AlignOfTest, BasicAlignmentInvariants) { + // For a very strange reason, many compilers do not support this. Both Clang + // and GCC fail to align these properly. + EXPECT_EQ(1u, alignOf<A1>()); +#if 0 + EXPECT_EQ(2u, alignOf<A2>()); + EXPECT_EQ(4u, alignOf<A4>()); + EXPECT_EQ(8u, alignOf<A8>()); +#endif + + // But once wrapped in structs, the alignment is correctly managed. + EXPECT_LE(1u, alignOf<SA1>()); + EXPECT_LE(2u, alignOf<SA2>()); + EXPECT_LE(4u, alignOf<SA4>()); + EXPECT_LE(8u, alignOf<SA8>()); + + EXPECT_EQ(1u, alignOf<char>()); + EXPECT_LE(alignOf<char>(), alignOf<short>()); + EXPECT_LE(alignOf<short>(), alignOf<int>()); + EXPECT_LE(alignOf<int>(), alignOf<long>()); + EXPECT_LE(alignOf<long>(), alignOf<long long>()); + EXPECT_LE(alignOf<char>(), alignOf<float>()); + EXPECT_LE(alignOf<float>(), alignOf<double>()); + EXPECT_LE(alignOf<char>(), alignOf<long double>()); + EXPECT_LE(alignOf<char>(), alignOf<void *>()); + EXPECT_EQ(alignOf<void *>(), alignOf<int *>()); + EXPECT_LE(alignOf<char>(), alignOf<S1>()); + EXPECT_LE(alignOf<S1>(), alignOf<S2>()); + EXPECT_LE(alignOf<S1>(), alignOf<S3>()); + EXPECT_LE(alignOf<S1>(), alignOf<S4>()); + EXPECT_LE(alignOf<S1>(), alignOf<S5>()); + EXPECT_LE(alignOf<S1>(), alignOf<S6>()); + EXPECT_LE(alignOf<S1>(), alignOf<D1>()); + EXPECT_LE(alignOf<S1>(), alignOf<D2>()); + EXPECT_LE(alignOf<S1>(), alignOf<D3>()); + EXPECT_LE(alignOf<S1>(), alignOf<D4>()); + EXPECT_LE(alignOf<S1>(), alignOf<D5>()); + EXPECT_LE(alignOf<S1>(), alignOf<D6>()); + EXPECT_LE(alignOf<S1>(), alignOf<D7>()); + EXPECT_LE(alignOf<S1>(), alignOf<D8>()); + EXPECT_LE(alignOf<S1>(), alignOf<D9>()); + EXPECT_LE(alignOf<S1>(), alignOf<V1>()); + EXPECT_LE(alignOf<V1>(), alignOf<V2>()); + EXPECT_LE(alignOf<V1>(), alignOf<V3>()); + EXPECT_LE(alignOf<V1>(), alignOf<V4>()); + EXPECT_LE(alignOf<V1>(), alignOf<V5>()); + EXPECT_LE(alignOf<V1>(), alignOf<V6>()); + EXPECT_LE(alignOf<V1>(), alignOf<V7>()); + EXPECT_LE(alignOf<V1>(), alignOf<V8>()); +} + +TEST(AlignOfTest, BasicAlignedArray) { + // Note: this code exclusively uses the struct-wrapped arbitrarily aligned + // types because of the bugs mentioned above where GCC and Clang both + // disregard the arbitrary alignment specifier until the type is used to + // declare a member of a struct. + EXPECT_LE(1u, alignOf<AlignedCharArray<SA1>::union_type>()); + EXPECT_LE(2u, alignOf<AlignedCharArray<SA2>::union_type>()); + EXPECT_LE(4u, alignOf<AlignedCharArray<SA4>::union_type>()); + EXPECT_LE(8u, alignOf<AlignedCharArray<SA8>::union_type>()); + + EXPECT_LE(1u, sizeof(AlignedCharArray<SA1>::union_type)); + EXPECT_LE(2u, sizeof(AlignedCharArray<SA2>::union_type)); + EXPECT_LE(4u, sizeof(AlignedCharArray<SA4>::union_type)); + EXPECT_LE(8u, sizeof(AlignedCharArray<SA8>::union_type)); + + EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1>::union_type>())); + EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1, SA2>::union_type>())); + EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1, SA2, SA4>::union_type>())); + EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1, SA2, SA4, SA8>::union_type>())); + + EXPECT_EQ(1u, sizeof(AlignedCharArray<SA1>::union_type)); + EXPECT_EQ(2u, sizeof(AlignedCharArray<SA1, SA2>::union_type)); + EXPECT_EQ(4u, sizeof(AlignedCharArray<SA1, SA2, SA4>::union_type)); + EXPECT_EQ(8u, sizeof(AlignedCharArray<SA1, SA2, SA4, SA8>::union_type)); + + EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1[1]>::union_type>())); + EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1[2], SA2[1]>::union_type>())); + EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1[42], SA2[55], + SA4[13]>::union_type>())); + EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1[2], SA2[1], + SA4, SA8>::union_type>())); + + EXPECT_EQ(1u, sizeof(AlignedCharArray<SA1[1]>::union_type)); + EXPECT_EQ(2u, sizeof(AlignedCharArray<SA1[2], SA2[1]>::union_type)); + EXPECT_EQ(4u, sizeof(AlignedCharArray<SA1[3], SA2[2], SA4>::union_type)); + EXPECT_EQ(16u, sizeof(AlignedCharArray<SA1, SA2[3], + SA4[3], SA8>::union_type)); + + // For other tests we simply assert that the alignment of the union mathes + // that of the fundamental type and hope that we have any weird type + // productions that would trigger bugs. + EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArray<char>::union_type>()); + EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArray<short>::union_type>()); + EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArray<int>::union_type>()); + EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArray<long>::union_type>()); + EXPECT_EQ(alignOf<long long>(), + alignOf<AlignedCharArray<long long>::union_type>()); + EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArray<float>::union_type>()); + EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArray<double>::union_type>()); + EXPECT_EQ(alignOf<long double>(), + alignOf<AlignedCharArray<long double>::union_type>()); + EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArray<void *>::union_type>()); + EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArray<int *>::union_type>()); + EXPECT_EQ(alignOf<double (*)(double)>(), + alignOf<AlignedCharArray<double (*)(double)>::union_type>()); + EXPECT_EQ(alignOf<double (S6::*)()>(), + alignOf<AlignedCharArray<double (S6::*)()>::union_type>()); + EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArray<S1>::union_type>()); + EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArray<S2>::union_type>()); + EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArray<S3>::union_type>()); + EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArray<S4>::union_type>()); + EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArray<S5>::union_type>()); + EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArray<S6>::union_type>()); + EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArray<D1>::union_type>()); + EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArray<D2>::union_type>()); + EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArray<D3>::union_type>()); + EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArray<D4>::union_type>()); + EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArray<D5>::union_type>()); + EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArray<D6>::union_type>()); + EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArray<D7>::union_type>()); + EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArray<D8>::union_type>()); + EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArray<D9>::union_type>()); + EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArray<V1>::union_type>()); + EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArray<V2>::union_type>()); + EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArray<V3>::union_type>()); + EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArray<V4>::union_type>()); + EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArray<V5>::union_type>()); + EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArray<V6>::union_type>()); + EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArray<V7>::union_type>()); + + // Some versions of MSVC get this wrong somewhat disturbingly. The failure + // appears to be benign: alignOf<V8>() produces a preposterous value: 12 +#ifndef _MSC_VER + EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArray<V8>::union_type>()); +#endif + + EXPECT_EQ(sizeof(char), sizeof(AlignedCharArray<char>::union_type)); + EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArray<char[1]>::union_type)); + EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArray<char[2]>::union_type)); + EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArray<char[3]>::union_type)); + EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArray<char[4]>::union_type)); + EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArray<char[5]>::union_type)); + EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArray<char[8]>::union_type)); + EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArray<char[13]>::union_type)); + EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArray<char[16]>::union_type)); + EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArray<char[21]>::union_type)); + EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArray<char[32]>::union_type)); + EXPECT_EQ(sizeof(short), sizeof(AlignedCharArray<short>::union_type)); + EXPECT_EQ(sizeof(int), sizeof(AlignedCharArray<int>::union_type)); + EXPECT_EQ(sizeof(long), sizeof(AlignedCharArray<long>::union_type)); + EXPECT_EQ(sizeof(long long), + sizeof(AlignedCharArray<long long>::union_type)); + EXPECT_EQ(sizeof(float), sizeof(AlignedCharArray<float>::union_type)); + EXPECT_EQ(sizeof(double), sizeof(AlignedCharArray<double>::union_type)); + EXPECT_EQ(sizeof(long double), + sizeof(AlignedCharArray<long double>::union_type)); + EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArray<void *>::union_type)); + EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArray<int *>::union_type)); + EXPECT_EQ(sizeof(double (*)(double)), + sizeof(AlignedCharArray<double (*)(double)>::union_type)); + EXPECT_EQ(sizeof(double (S6::*)()), + sizeof(AlignedCharArray<double (S6::*)()>::union_type)); + EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArray<S1>::union_type)); + EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArray<S2>::union_type)); + EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArray<S3>::union_type)); + EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArray<S4>::union_type)); + EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArray<S5>::union_type)); + EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArray<S6>::union_type)); + EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArray<D1>::union_type)); + EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArray<D2>::union_type)); + EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArray<D3>::union_type)); + EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArray<D4>::union_type)); + EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArray<D5>::union_type)); + EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArray<D6>::union_type)); + EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArray<D7>::union_type)); + EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArray<D8>::union_type)); + EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArray<D9>::union_type)); + EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArray<D9[1]>::union_type)); + EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArray<D9[2]>::union_type)); + EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArray<D9[3]>::union_type)); + EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArray<D9[4]>::union_type)); + EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArray<D9[5]>::union_type)); + EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArray<D9[8]>::union_type)); + EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArray<D9[13]>::union_type)); + EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArray<D9[16]>::union_type)); + EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArray<D9[21]>::union_type)); + EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArray<D9[32]>::union_type)); + EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArray<V1>::union_type)); + EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArray<V2>::union_type)); + EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArray<V3>::union_type)); + EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArray<V4>::union_type)); + EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArray<V5>::union_type)); + EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArray<V6>::union_type)); + EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArray<V7>::union_type)); + + // Some versions of MSVC also get this wrong. The failure again appears to be + // benign: sizeof(V8) is only 52 bytes, but our array reserves 56. +#ifndef _MSC_VER + EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArray<V8>::union_type)); +#endif +} + +} diff --git a/unittests/Support/BlockFrequencyTest.cpp b/unittests/Support/BlockFrequencyTest.cpp index df256424b8..9c5bd7b893 100644 --- a/unittests/Support/BlockFrequencyTest.cpp +++ b/unittests/Support/BlockFrequencyTest.cpp @@ -34,7 +34,7 @@ TEST(BlockFrequencyTest, MaxToHalfMax) { BlockFrequency Freq(UINT64_MAX); BranchProbability Prob(UINT32_MAX / 2, UINT32_MAX); Freq *= Prob; - EXPECT_EQ(Freq.getFrequency(), 9223372034707292159LLu); + EXPECT_EQ(Freq.getFrequency(), 9223372034707292159ULL); } TEST(BlockFrequencyTest, BigToBig) { diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt new file mode 100644 index 0000000000..6053994d0c --- /dev/null +++ b/unittests/Support/CMakeLists.txt @@ -0,0 +1,29 @@ +set(LLVM_LINK_COMPONENTS + Support + Core + ) + +add_llvm_unittest(SupportTests + AlignOfTest.cpp + AllocatorTest.cpp + BlockFrequencyTest.cpp + Casting.cpp + CommandLineTest.cpp + ConstantRangeTest.cpp + DataExtractorTest.cpp + EndianTest.cpp + IntegersSubsetTest.cpp + IRBuilderTest.cpp + LeakDetectorTest.cpp + ManagedStatic.cpp + MathExtrasTest.cpp + MDBuilderTest.cpp + Path.cpp + raw_ostream_test.cpp + RegexTest.cpp + SwapByteOrderTest.cpp + TimeValue.cpp + TypeBuilderTest.cpp + ValueHandleTest.cpp + YAMLParserTest.cpp + ) diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp index 5fcdcfd2b4..72540c6999 100644 --- a/unittests/Support/ConstantRangeTest.cpp +++ b/unittests/Support/ConstantRangeTest.cpp @@ -234,9 +234,39 @@ TEST_F(ConstantRangeTest, IntersectWith) { EXPECT_TRUE(LHS.intersectWith(RHS) == LHS); // previous bug: intersection of [min, 3) and [2, max) should be 2 - LHS = ConstantRange(APInt(32, -2147483648), APInt(32, 3)); - RHS = ConstantRange(APInt(32, 2), APInt(32, 2147483648)); + LHS = ConstantRange(APInt(32, -2147483646), APInt(32, 3)); + RHS = ConstantRange(APInt(32, 2), APInt(32, 2147483646)); EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 2))); + + // [2, 0) /\ [4, 3) = [2, 0) + LHS = ConstantRange(APInt(32, 2), APInt(32, 0)); + RHS = ConstantRange(APInt(32, 4), APInt(32, 3)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 2), APInt(32, 0))); + + // [2, 0) /\ [4, 2) = [4, 0) + LHS = ConstantRange(APInt(32, 2), APInt(32, 0)); + RHS = ConstantRange(APInt(32, 4), APInt(32, 2)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 4), APInt(32, 0))); + + // [4, 2) /\ [5, 1) = [5, 1) + LHS = ConstantRange(APInt(32, 4), APInt(32, 2)); + RHS = ConstantRange(APInt(32, 5), APInt(32, 1)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 5), APInt(32, 1))); + + // [2, 0) /\ [7, 4) = [7, 4) + LHS = ConstantRange(APInt(32, 2), APInt(32, 0)); + RHS = ConstantRange(APInt(32, 7), APInt(32, 4)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 7), APInt(32, 4))); + + // [4, 2) /\ [1, 0) = [1, 0) + LHS = ConstantRange(APInt(32, 4), APInt(32, 2)); + RHS = ConstantRange(APInt(32, 1), APInt(32, 0)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 4), APInt(32, 2))); + + // [15, 0) /\ [7, 6) = [15, 0) + LHS = ConstantRange(APInt(32, 15), APInt(32, 0)); + RHS = ConstantRange(APInt(32, 7), APInt(32, 6)); + EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 15), APInt(32, 0))); } TEST_F(ConstantRangeTest, UnionWith) { @@ -259,6 +289,23 @@ TEST_F(ConstantRangeTest, UnionWith) { ConstantRange(16)); } +TEST_F(ConstantRangeTest, SetDifference) { + EXPECT_EQ(Full.difference(Empty), Full); + EXPECT_EQ(Full.difference(Full), Empty); + EXPECT_EQ(Empty.difference(Empty), Empty); + EXPECT_EQ(Empty.difference(Full), Empty); + + ConstantRange A(APInt(16, 3), APInt(16, 7)); + ConstantRange B(APInt(16, 5), APInt(16, 9)); + ConstantRange C(APInt(16, 3), APInt(16, 5)); + ConstantRange D(APInt(16, 7), APInt(16, 9)); + ConstantRange E(APInt(16, 5), APInt(16, 4)); + ConstantRange F(APInt(16, 7), APInt(16, 3)); + EXPECT_EQ(A.difference(B), C); + EXPECT_EQ(B.difference(A), D); + EXPECT_EQ(E.difference(A), F); +} + TEST_F(ConstantRangeTest, SubtractAPInt) { EXPECT_EQ(Full.subtract(APInt(16, 4)), Full); EXPECT_EQ(Empty.subtract(APInt(16, 4)), Empty); diff --git a/unittests/Support/IRBuilderTest.cpp b/unittests/Support/IRBuilderTest.cpp index b15de9ed38..56b9f15462 100644 --- a/unittests/Support/IRBuilderTest.cpp +++ b/unittests/Support/IRBuilderTest.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/IRBuilder.h" #include "llvm/BasicBlock.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" @@ -20,6 +20,7 @@ using namespace llvm; namespace { + class IRBuilderTest : public testing::Test { protected: virtual void SetUp() { @@ -38,7 +39,6 @@ protected: OwningPtr<Module> M; BasicBlock *BB; }; -} TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); @@ -70,3 +70,5 @@ TEST_F(IRBuilderTest, Lifetime) { ASSERT_TRUE(II_End1 != NULL); EXPECT_EQ(II_End1->getIntrinsicID(), Intrinsic::lifetime_end); } + +} diff --git a/unittests/Support/IntegersSubsetTest.cpp b/unittests/Support/IntegersSubsetTest.cpp index 471df5d0cb..5d1dde4c37 100644 --- a/unittests/Support/IntegersSubsetTest.cpp +++ b/unittests/Support/IntegersSubsetTest.cpp @@ -21,6 +21,7 @@ namespace { class Int : public APInt { public: + Int() {} Int(uint64_t V) : APInt(64, V) {} Int(const APInt& Src) : APInt(Src) {} bool operator < (const APInt& RHS) const { return ult(RHS); } @@ -179,177 +180,149 @@ namespace { } } - TEST(IntegersSubsetTest, ExcludeTest) { - std::vector<Range> Ranges; - Ranges.reserve(3); - - Mapping TheMapping; - - // Test case - // { {0, 4}, {7, 10} {13, 17} } - // sub - // { {3, 14} } - // = - // { {0, 2}, {15, 17} } - - Ranges.push_back(Range(Int(0), Int(4))); - Ranges.push_back(Range(Int(7), Int(10))); - Ranges.push_back(Range(Int(13), Int(17))); - - Subset TheSubset(Ranges); - - TheMapping.add(TheSubset); - - Ranges.clear(); - Ranges.push_back(Range(Int(3), Int(14))); - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); - - TheSubset = TheMapping.getCase(); - - EXPECT_EQ(TheSubset.getNumItems(), 2ULL); - EXPECT_EQ(TheSubset.getItem(0), Range(Int(0), Int(2))); - EXPECT_EQ(TheSubset.getItem(1), Range(Int(15), Int(17))); - - // Test case - // { {0, 4}, {7, 10} {13, 17} } - // sub - // { {0, 4}, {13, 17} } - // = - // { {7, 10 } - - Ranges.clear(); - Ranges.push_back(Range(Int(0), Int(4))); - Ranges.push_back(Range(Int(7), Int(10))); - Ranges.push_back(Range(Int(13), Int(17))); - - TheSubset = Subset(Ranges); - - TheMapping.clear(); - TheMapping.add(TheSubset); + typedef unsigned unsigned_pair[2]; + typedef unsigned_pair unsigned_ranges[]; + + void TestDiff( + const unsigned_ranges LHS, + unsigned LSize, + const unsigned_ranges RHS, + unsigned RSize, + const unsigned_ranges ExcludeRes, + unsigned ExcludeResSize, + const unsigned_ranges IntersectRes, + unsigned IntersectResSize + ) { + + Mapping::RangesCollection Ranges; + + Mapping LHSMapping; + for (unsigned i = 0; i < LSize; ++i) + Ranges.push_back(Range(Int(LHS[i][0]), Int(LHS[i][1]))); + LHSMapping.add(Ranges); Ranges.clear(); - Ranges.push_back(Range(Int(0), Int(4))); - Ranges.push_back(Range(Int(13), Int(17))); - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); - - TheSubset = TheMapping.getCase(); - - EXPECT_EQ(TheSubset.getNumItems(), 1ULL); - EXPECT_EQ(TheSubset.getItem(0), Range(Int(7), Int(10))); + Mapping RHSMapping; + for (unsigned i = 0; i < RSize; ++i) + Ranges.push_back(Range(Int(RHS[i][0]), Int(RHS[i][1]))); + RHSMapping.add(Ranges); + + Mapping LExclude, Intersection; + + LHSMapping.diff(&LExclude, &Intersection, 0, RHSMapping); + + if (ExcludeResSize) { + EXPECT_EQ(LExclude.size(), ExcludeResSize); + + unsigned i = 0; + for (Mapping::RangeIterator rei = LExclude.begin(), + e = LExclude.end(); rei != e; ++rei, ++i) + EXPECT_EQ(rei->first, Range(ExcludeRes[i][0], ExcludeRes[i][1])); + } else + EXPECT_TRUE(LExclude.empty()); + + if (IntersectResSize) { + EXPECT_EQ(Intersection.size(), IntersectResSize); + + unsigned i = 0; + for (Mapping::RangeIterator ii = Intersection.begin(), + e = Intersection.end(); ii != e; ++ii, ++i) + EXPECT_EQ(ii->first, Range(IntersectRes[i][0], IntersectRes[i][1])); + } else + EXPECT_TRUE(Intersection.empty()); - // Test case - // { {0, 17} } - // sub - // { {1, 5}, {10, 12}, {15, 16} } - // = - // { {0}, {6, 9}, {13, 14}, {17} } - - Ranges.clear(); - Ranges.push_back(Range(Int(0), Int(17))); - - TheSubset = Subset(Ranges); + LExclude.clear(); + Intersection.clear(); + RHSMapping.diff(0, &Intersection, &LExclude, LHSMapping); + + // Check LExclude again. + if (ExcludeResSize) { + EXPECT_EQ(LExclude.size(), ExcludeResSize); + + unsigned i = 0; + for (Mapping::RangeIterator rei = LExclude.begin(), + e = LExclude.end(); rei != e; ++rei, ++i) + EXPECT_EQ(rei->first, Range(ExcludeRes[i][0], ExcludeRes[i][1])); + } else + EXPECT_TRUE(LExclude.empty()); + } + + TEST(IntegersSubsetTest, DiffTest) { - TheMapping.clear(); - TheMapping.add(TheSubset); + static const unsigned NOT_A_NUMBER = 0xffff; - Ranges.clear(); - Ranges.push_back(Range(Int(1), Int(5))); - Ranges.push_back(Range(Int(10), Int(12))); - Ranges.push_back(Range(Int(15), Int(16))); + { + unsigned_ranges LHS = { { 0, 4 }, { 7, 10 }, { 13, 17 } }; + unsigned_ranges RHS = { { 3, 14 } }; + unsigned_ranges ExcludeRes = { { 0, 2 }, { 15, 17 } }; + unsigned_ranges IntersectRes = { { 3, 4 }, { 7, 10 }, { 13, 14 } }; - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); - - TheSubset = TheMapping.getCase(); - - EXPECT_EQ(TheSubset.getNumItems(), 4ULL); - EXPECT_EQ(TheSubset.getItem(0), Range(Int(0))); - EXPECT_EQ(TheSubset.getItem(1), Range(Int(6), Int(9))); - EXPECT_EQ(TheSubset.getItem(2), Range(Int(13), Int(14))); - EXPECT_EQ(TheSubset.getItem(3), Range(Int(17))); - - // Test case - // { {2, 4} } - // sub - // { {0, 5} } - // = - // { empty } - - Ranges.clear(); - Ranges.push_back(Range(Int(2), Int(4))); - - TheSubset = Subset(Ranges); - - TheMapping.clear(); - TheMapping.add(TheSubset); - - Ranges.clear(); - Ranges.push_back(Range(Int(0), Int(5))); + TestDiff(LHS, 3, RHS, 1, ExcludeRes, 2, IntersectRes, 3); + } - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); - - EXPECT_TRUE(TheMapping.empty()); - - // Test case - // { {2, 4} } - // sub - // { {7, 8} } - // = - // { {2, 4} } - - Ranges.clear(); - Ranges.push_back(Range(Int(2), Int(4))); - - TheSubset = Subset(Ranges); - - TheMapping.clear(); - TheMapping.add(TheSubset); - - Ranges.clear(); - Ranges.push_back(Range(Int(7), Int(8))); + { + unsigned_ranges LHS = { { 0, 4 }, { 7, 10 }, { 13, 17 } }; + unsigned_ranges RHS = { { 0, 4 }, { 13, 17 } }; + unsigned_ranges ExcludeRes = { { 7, 10 } }; + unsigned_ranges IntersectRes = { { 0, 4 }, { 13, 17 } }; - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); - - TheSubset = TheMapping.getCase(); - - EXPECT_EQ(TheSubset.getNumItems(), 1ULL); - EXPECT_EQ(TheSubset.getItem(0), Range(Int(2), Int(4))); - - // Test case - // { {3, 7} } - // sub - // { {1, 4} } - // = - // { {5, 7} } - - Ranges.clear(); - Ranges.push_back(Range(Int(3), Int(7))); - - TheSubset = Subset(Ranges); - - TheMapping.clear(); - TheMapping.add(TheSubset); - - Ranges.clear(); - Ranges.push_back(Range(Int(1), Int(4))); + TestDiff(LHS, 3, RHS, 2, ExcludeRes, 1, IntersectRes, 2); + } - TheSubset = Subset(Ranges); - - TheMapping.exclude(TheSubset); + { + unsigned_ranges LHS = { { 0, 17 } }; + unsigned_ranges RHS = { { 1, 5 }, { 10, 12 }, { 15, 16 } }; + unsigned_ranges ExcludeRes = + { { 0, 0 }, { 6, 9 }, { 13, 14 }, { 17, 17 } }; + unsigned_ranges IntersectRes = { { 1, 5 }, { 10, 12 }, { 15, 16 } }; + + TestDiff(LHS, 1, RHS, 3, ExcludeRes, 4, IntersectRes, 3); + } + + { + unsigned_ranges LHS = { { 2, 4 } }; + unsigned_ranges RHS = { { 0, 5 } }; + unsigned_ranges ExcludeRes = { {NOT_A_NUMBER, NOT_A_NUMBER} }; + unsigned_ranges IntersectRes = { { 2, 4 } }; + + TestDiff(LHS, 1, RHS, 1, ExcludeRes, 0, IntersectRes, 1); + } + + { + unsigned_ranges LHS = { { 2, 4 } }; + unsigned_ranges RHS = { { 7, 8 } }; + unsigned_ranges ExcludeRes = { { 2, 4 } }; + unsigned_ranges IntersectRes = { {NOT_A_NUMBER, NOT_A_NUMBER} }; + + TestDiff(LHS, 1, RHS, 1, ExcludeRes, 1, IntersectRes, 0); + } + + { + unsigned_ranges LHS = { { 3, 7 } }; + unsigned_ranges RHS = { { 1, 4 } }; + unsigned_ranges ExcludeRes = { { 5, 7 } }; + unsigned_ranges IntersectRes = { { 3, 4 } }; + + TestDiff(LHS, 1, RHS, 1, ExcludeRes, 1, IntersectRes, 1); + } - TheSubset = TheMapping.getCase(); + { + unsigned_ranges LHS = { { 0, 7 } }; + unsigned_ranges RHS = { { 0, 5 }, { 6, 9 } }; + unsigned_ranges ExcludeRes = { {NOT_A_NUMBER, NOT_A_NUMBER} }; + unsigned_ranges IntersectRes = { { 0, 5 }, {6, 7} }; + + TestDiff(LHS, 1, RHS, 2, ExcludeRes, 0, IntersectRes, 2); + } - EXPECT_EQ(TheSubset.getNumItems(), 1ULL); - EXPECT_EQ(TheSubset.getItem(0), Range(Int(5), Int(7))); - } + { + unsigned_ranges LHS = { { 17, 17 } }; + unsigned_ranges RHS = { { 4, 4 } }; + unsigned_ranges ExcludeRes = { {17, 17} }; + unsigned_ranges IntersectRes = { { NOT_A_NUMBER, NOT_A_NUMBER } }; + + TestDiff(LHS, 1, RHS, 1, ExcludeRes, 1, IntersectRes, 0); + } + } } diff --git a/unittests/Support/MDBuilderTest.cpp b/unittests/Support/MDBuilderTest.cpp index d54c7e8e8d..af3f4348ff 100644 --- a/unittests/Support/MDBuilderTest.cpp +++ b/unittests/Support/MDBuilderTest.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" +#include "llvm/IRBuilder.h" #include "llvm/Operator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MDBuilder.h" + +#include "gtest/gtest.h" + using namespace llvm; namespace { diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index 358dad0f83..a071a5a8d6 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -312,4 +312,72 @@ TEST_F(FileSystemTest, Magic) { } } +#if !defined(_WIN32) // FIXME: Win32 has different permission schema. +TEST_F(FileSystemTest, Permissions) { + // Create a temp file. + int FileDescriptor; + SmallString<64> TempPath; + ASSERT_NO_ERROR( + fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath)); + + // Mark file as read-only + const fs::perms AllWrite = fs::owner_write|fs::group_write|fs::others_write; + ASSERT_NO_ERROR(fs::permissions(Twine(TempPath), fs::remove_perms|AllWrite)); + + // Verify file is read-only + fs::file_status Status; + ASSERT_NO_ERROR(fs::status(Twine(TempPath), Status)); + bool AnyWriteBits = (Status.permissions() & AllWrite); + EXPECT_FALSE(AnyWriteBits); + + // Mark file as read-write + ASSERT_NO_ERROR(fs::permissions(Twine(TempPath), fs::add_perms|AllWrite)); + + // Verify file is read-write + ASSERT_NO_ERROR(fs::status(Twine(TempPath), Status)); + AnyWriteBits = (Status.permissions() & AllWrite); + EXPECT_TRUE(AnyWriteBits); +} +#endif + +#if !defined(_WIN32) // FIXME: temporary suppressed. +TEST_F(FileSystemTest, FileMapping) { + // Create a temp file. + int FileDescriptor; + SmallString<64> TempPath; + ASSERT_NO_ERROR( + fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath)); + + // Grow temp file to be 4096 bytes + ASSERT_NO_ERROR(sys::fs::resize_file(Twine(TempPath), 4096)); + + // Map in temp file and add some content + void* MappedMemory; + ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, + true /*writable*/, MappedMemory)); + char* Memory = reinterpret_cast<char*>(MappedMemory); + strcpy(Memory, "hello there"); + + // Unmap temp file + ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096)); + MappedMemory = NULL; + Memory = NULL; + + // Map it back in read-only + ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, + false /*read-only*/, MappedMemory)); + + // Verify content + Memory = reinterpret_cast<char*>(MappedMemory); + bool SAME = (strcmp(Memory, "hello there") == 0); + EXPECT_TRUE(SAME); + + // Unmap temp file + ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096)); + MappedMemory = NULL; + Memory = NULL; +} +#endif + + } // anonymous namespace diff --git a/unittests/Transforms/CMakeLists.txt b/unittests/Transforms/CMakeLists.txt new file mode 100644 index 0000000000..e3ce185e0d --- /dev/null +++ b/unittests/Transforms/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Utils) diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt new file mode 100644 index 0000000000..365bfbb0bf --- /dev/null +++ b/unittests/Transforms/Utils/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LLVM_LINK_COMPONENTS + TransformUtils + ) + +add_llvm_unittest(UtilsTests + Cloning.cpp + Local.cpp + ) diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp index 4243b2d39d..ea3d5bee78 100644 --- a/unittests/Transforms/Utils/Cloning.cpp +++ b/unittests/Transforms/Utils/Cloning.cpp @@ -18,6 +18,7 @@ using namespace llvm; namespace { + class CloneInstruction : public ::testing::Test { protected: virtual void SetUp() { @@ -48,7 +49,6 @@ protected: LLVMContext context; Value *V; }; -} TEST_F(CloneInstruction, OverflowBits) { V = new Argument(Type::getInt32Ty(context)); @@ -142,3 +142,5 @@ TEST_F(CloneInstruction, Exact) { SDiv->setIsExact(true); EXPECT_TRUE(this->clone(SDiv)->isExact()); } + +} diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp index 3026b4bc34..727f5ea525 100644 --- a/unittests/Transforms/Utils/Local.cpp +++ b/unittests/Transforms/Utils/Local.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" #include "llvm/BasicBlock.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Transforms/Utils/Local.h" +#include "gtest/gtest.h" + using namespace llvm; TEST(Local, RecursivelyDeleteDeadPHINodes) { diff --git a/unittests/Transforms/Utils/Makefile b/unittests/Transforms/Utils/Makefile index fdf4be0e0e..e6c2a2c133 100644 --- a/unittests/Transforms/Utils/Makefile +++ b/unittests/Transforms/Utils/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../.. TESTNAME = Utils -LINK_COMPONENTS := core support transformutils +LINK_COMPONENTS := TransformUtils include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest diff --git a/unittests/VMCore/CMakeLists.txt b/unittests/VMCore/CMakeLists.txt new file mode 100644 index 0000000000..5a87605b27 --- /dev/null +++ b/unittests/VMCore/CMakeLists.txt @@ -0,0 +1,31 @@ +set(LLVM_LINK_COMPONENTS + asmparser + core + ipa + ) + +set(VMCoreSources + ConstantsTest.cpp + DominatorTreeTest.cpp + InstructionsTest.cpp + MetadataTest.cpp + PassManagerTest.cpp + ValueMapTest.cpp + VerifierTest.cpp + ) + +# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug. +# See issue#331418 in Visual Studio. +if(MSVC AND MSVC_VERSION LESS 1600) + list(REMOVE_ITEM VMCoreSources ValueMapTest.cpp) +endif() + +# HACK: Declare a couple of source files as optionally compiled to satisfy the +# missing-file-checker in LLVM's weird CMake build. +set(LLVM_OPTIONAL_SOURCES + ValueMapTest.cpp + ) + +add_llvm_unittest(VMCoreTests + ${VMCoreSources} + ) diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp index d002101cd3..a8902d932e 100644 --- a/unittests/VMCore/InstructionsTest.cpp +++ b/unittests/VMCore/InstructionsTest.cpp @@ -7,16 +7,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Instructions.h" #include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/MDBuilder.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Target/TargetData.h" #include "gtest/gtest.h" diff --git a/unittests/VMCore/Makefile b/unittests/VMCore/Makefile index df55065e19..d743dc5d5b 100644 --- a/unittests/VMCore/Makefile +++ b/unittests/VMCore/Makefile @@ -9,7 +9,7 @@ LEVEL = ../.. TESTNAME = VMCore -LINK_COMPONENTS := core support target ipa asmparser +LINK_COMPONENTS := core ipa asmparser include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index e980b1a7d9..f5e094e486 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -186,6 +186,8 @@ struct ClassInfo { /// For register classes, the records for all the registers in this class. std::set<Record*> Registers; + /// For custom match classes, he diagnostic kind for when the predicate fails. + std::string DiagnosticType; public: /// isRegisterClass() - Check if this is a register class. bool isRegisterClass() const { @@ -593,6 +595,9 @@ public: /// Map of Predicate records to their subtarget information. std::map<Record*, SubtargetFeatureInfo*> SubtargetFeatures; + /// Map of AsmOperandClass records to their class information. + std::map<Record*, ClassInfo*> AsmOperandClasses; + private: /// Map of token to class information which has already been constructed. std::map<std::string, ClassInfo*> TokenClasses; @@ -600,9 +605,6 @@ private: /// Map of RegisterClass records to their class information. std::map<Record*, ClassInfo*> RegisterClassClasses; - /// Map of AsmOperandClass records to their class information. - std::map<Record*, ClassInfo*> AsmOperandClasses; - private: /// getTokenClass - Lookup or create the class for the given token. ClassInfo *getTokenClass(StringRef Token); @@ -960,6 +962,7 @@ ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) { Entry->PredicateMethod = "<invalid>"; Entry->RenderMethod = "<invalid>"; Entry->ParserMethod = ""; + Entry->DiagnosticType = ""; Classes.push_back(Entry); } @@ -1085,6 +1088,8 @@ buildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) { CI->PredicateMethod = ""; // unused CI->RenderMethod = "addRegOperands"; CI->Registers = *it; + // FIXME: diagnostic type. + CI->DiagnosticType = ""; Classes.push_back(CI); RegisterSetClasses.insert(std::make_pair(*it, CI)); } @@ -1200,6 +1205,12 @@ void AsmMatcherInfo::buildOperandClasses() { if (StringInit *SI = dynamic_cast<StringInit*>(PRMName)) CI->ParserMethod = SI->getValue(); + // Get the diagnostic type or leave it as empty. + // Get the parse method name or leave it as empty. + Init *DiagnosticType = (*it)->getValueInit("DiagnosticType"); + if (StringInit *SI = dynamic_cast<StringInit*>(DiagnosticType)) + CI->DiagnosticType = SI->getValue(); + AsmOperandClasses[*it] = CI; Classes.push_back(CI); } @@ -1802,19 +1813,21 @@ static void emitMatchClassEnumeration(CodeGenTarget &Target, /// emitValidateOperandClass - Emit the function to validate an operand class. static void emitValidateOperandClass(AsmMatcherInfo &Info, raw_ostream &OS) { - OS << "static bool validateOperandClass(MCParsedAsmOperand *GOp, " + OS << "static unsigned validateOperandClass(MCParsedAsmOperand *GOp, " << "MatchClassKind Kind) {\n"; OS << " " << Info.Target.getName() << "Operand &Operand = *(" << Info.Target.getName() << "Operand*)GOp;\n"; // The InvalidMatchClass is not to match any operand. OS << " if (Kind == InvalidMatchClass)\n"; - OS << " return false;\n\n"; + OS << " return MCTargetAsmParser::Match_InvalidOperand;\n\n"; // Check for Token operands first. + // FIXME: Use a more specific diagnostic type. OS << " if (Operand.isToken())\n"; - OS << " return isSubclass(matchTokenString(Operand.getToken()), Kind);" - << "\n\n"; + OS << " return isSubclass(matchTokenString(Operand.getToken()), Kind) ?\n" + << " MCTargetAsmParser::Match_Success :\n" + << " MCTargetAsmParser::Match_InvalidOperand;\n\n"; // Check for register operands, including sub-classes. OS << " if (Operand.isReg()) {\n"; @@ -1828,8 +1841,9 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info, << it->first->getName() << ": OpKind = " << it->second->Name << "; break;\n"; OS << " }\n"; - OS << " return isSubclass(OpKind, Kind);\n"; - OS << " }\n\n"; + OS << " return isSubclass(OpKind, Kind) ? " + << "MCTargetAsmParser::Match_Success :\n " + << " MCTargetAsmParser::Match_InvalidOperand;\n }\n\n"; // Check the user classes. We don't care what order since we're only // actually matching against one of them. @@ -1841,13 +1855,18 @@ static void emitValidateOperandClass(AsmMatcherInfo &Info, continue; OS << " // '" << CI.ClassName << "' class\n"; - OS << " if (Kind == " << CI.Name - << " && Operand." << CI.PredicateMethod << "()) {\n"; - OS << " return true;\n"; + OS << " if (Kind == " << CI.Name << ") {\n"; + OS << " if (Operand." << CI.PredicateMethod << "())\n"; + OS << " return MCTargetAsmParser::Match_Success;\n"; + if (!CI.DiagnosticType.empty()) + OS << " return " << Info.Target.getName() << "AsmParser::Match_" + << CI.DiagnosticType << ";\n"; OS << " }\n\n"; } - OS << " return false;\n"; + // Generic fallthrough match failure case for operands that don't have + // specialized diagnostic types. + OS << " return MCTargetAsmParser::Match_InvalidOperand;\n"; OS << "}\n\n"; } @@ -1963,6 +1982,26 @@ static void emitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info, OS << "};\n\n"; } +/// emitOperandDiagnosticTypes - Emit the operand matching diagnostic types. +static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) { + // Get the set of diagnostic types from all of the operand classes. + std::set<StringRef> Types; + for (std::map<Record*, ClassInfo*>::const_iterator + I = Info.AsmOperandClasses.begin(), + E = Info.AsmOperandClasses.end(); I != E; ++I) { + if (!I->second->DiagnosticType.empty()) + Types.insert(I->second->DiagnosticType); + } + + if (Types.empty()) return; + + // Now emit the enum entries. + for (std::set<StringRef>::const_iterator I = Types.begin(), E = Types.end(); + I != E; ++I) + OS << " Match_" << *I << ",\n"; + OS << " END_OPERAND_DIAGNOSTIC_TYPES\n"; +} + /// emitGetSubtargetFeatureName - Emit the helper function to get the /// user-level name for a subtarget feature. static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) { @@ -2394,6 +2433,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << "#endif // GET_ASSEMBLER_HEADER_INFO\n\n"; + // Emit the operand match diagnostic enum names. + OS << "\n#ifdef GET_OPERAND_DIAGNOSTIC_TYPES\n"; + OS << "#undef GET_OPERAND_DIAGNOSTIC_TYPES\n\n"; + emitOperandDiagnosticTypes(Info, OS); + OS << "#endif // GET_OPERAND_DIAGNOSTIC_TYPES\n\n"; + + OS << "\n#ifdef GET_REGISTER_MATCHER\n"; OS << "#undef GET_REGISTER_MATCHER\n\n"; @@ -2575,6 +2621,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " bool HadMatchOtherThanFeatures = false;\n"; OS << " bool HadMatchOtherThanPredicate = false;\n"; OS << " unsigned RetCode = Match_InvalidOperand;\n"; + OS << " unsigned MissingFeatures = ~0U;\n"; OS << " // Set ErrorInfo to the operand that mismatches if it is\n"; OS << " // wrong for all instances of the instruction.\n"; OS << " ErrorInfo = ~0U;\n"; @@ -2604,13 +2651,22 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n"; OS << " break;\n"; OS << " }\n"; - OS << " if (validateOperandClass(Operands[i+1], " - "(MatchClassKind)it->Classes[i]))\n"; + OS << " unsigned Diag = validateOperandClass(Operands[i+1],\n"; + OS.indent(43); + OS << "(MatchClassKind)it->Classes[i]);\n"; + OS << " if (Diag == Match_Success)\n"; OS << " continue;\n"; OS << " // If this operand is broken for all of the instances of this\n"; OS << " // mnemonic, keep track of it so we can report loc info.\n"; - OS << " if (it == MnemonicRange.first || ErrorInfo <= i+1)\n"; + OS << " // If we already had a match that only failed due to a\n"; + OS << " // target predicate, that diagnostic is preferred.\n"; + OS << " if (!HadMatchOtherThanPredicate &&\n"; + OS << " (it == MnemonicRange.first || ErrorInfo <= i+1)) {\n"; OS << " ErrorInfo = i+1;\n"; + OS << " // InvalidOperand is the default. Prefer specificity.\n"; + OS << " if (Diag != Match_InvalidOperand)\n"; + OS << " RetCode = Diag;\n"; + OS << " }\n"; OS << " // Otherwise, just reject this instance of the mnemonic.\n"; OS << " OperandsValid = false;\n"; OS << " break;\n"; @@ -2622,7 +2678,11 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " if ((AvailableFeatures & it->RequiredFeatures) " << "!= it->RequiredFeatures) {\n"; OS << " HadMatchOtherThanFeatures = true;\n"; - OS << " ErrorInfo = it->RequiredFeatures & ~AvailableFeatures;\n"; + OS << " unsigned NewMissingFeatures = it->RequiredFeatures & " + "~AvailableFeatures;\n"; + OS << " if (CountPopulation_32(NewMissingFeatures) <= " + "CountPopulation_32(MissingFeatures))\n"; + OS << " MissingFeatures = NewMissingFeatures;\n"; OS << " continue;\n"; OS << " }\n"; OS << "\n"; @@ -2656,8 +2716,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " // Okay, we had no match. Try to return a useful error code.\n"; OS << " if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)"; - OS << " return RetCode;\n"; - OS << " assert(ErrorInfo && \"missing feature(s) but what?!\");"; + OS << " return RetCode;\n"; + OS << " // Missing feature matches return which features were missing\n"; + OS << " ErrorInfo = MissingFeatures;\n"; OS << " return Match_MissingFeature;\n"; OS << "}\n\n"; diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 2ac7b87e70..aed222c094 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -690,6 +690,13 @@ EmitResultInstructionAsOperand(const TreePatternNode *N, bool NodeHasChain = InstPatNode && InstPatNode->TreeHasProperty(SDNPHasChain, CGP); + // Instructions which load and store from memory should have a chain, + // regardless of whether they happen to have an internal pattern saying so. + if (Pattern.getSrcPattern()->TreeHasProperty(SDNPHasChain, CGP) + && (II.hasCtrlDep || II.mayLoad || II.mayStore || II.canFoldAsLoad || + II.hasSideEffects)) + NodeHasChain = true; + bool isRoot = N == Pattern.getDstPattern(); // TreeHasOutGlue - True if this tree has glue. diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp index 26ab76390e..8bfecead6d 100644 --- a/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/utils/TableGen/DFAPacketizerEmitter.cpp @@ -94,7 +94,12 @@ class State { // PossibleStates is the set of valid resource states that ensue from valid // transitions. // - bool canAddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates); + bool canAddInsnClass(unsigned InsnClass) const; + // + // AddInsnClass - Return all combinations of resource reservation + // which are possible from this state (PossibleStates). + // + void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates); }; } // End anonymous namespace. @@ -120,6 +125,10 @@ namespace { struct ltState { bool operator()(const State *s1, const State *s2) const; }; + +struct ltTransition { + bool operator()(const Transition *s1, const Transition *s2) const; +}; } // End anonymous namespace. @@ -135,7 +144,8 @@ public: std::set<State*, ltState> states; // Map from a state to the list of transitions with that state as source. - std::map<State*, SmallVector<Transition*, 16>, ltState> stateTransitions; + std::map<State*, std::set<Transition*, ltTransition>, ltState> + stateTransitions; State *currentState; // Highest valued Input seen. @@ -193,21 +203,19 @@ bool ltState::operator()(const State *s1, const State *s2) const { return (s1->stateNum < s2->stateNum); } +bool ltTransition::operator()(const Transition *s1, const Transition *s2) const { + return (s1->input < s2->input); +} // -// canAddInsnClass - Returns true if an instruction of type InsnClass is a -// valid transition from this state i.e., can an instruction of type InsnClass -// be added to the packet represented by this state. +// AddInsnClass - Return all combinations of resource reservation +// which are possible from this state (PossibleStates). // -// PossibleStates is the set of valid resource states that ensue from valid -// transitions. -// -bool State::canAddInsnClass(unsigned InsnClass, +void State::AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) { // // Iterate over all resource states in currentState. // - bool AddedState = false; for (std::set<unsigned>::iterator SI = stateInfo.begin(); SI != stateInfo.end(); ++SI) { @@ -240,13 +248,26 @@ bool State::canAddInsnClass(unsigned InsnClass, (VisitedResourceStates.count(ResultingResourceState) == 0)) { VisitedResourceStates.insert(ResultingResourceState); PossibleStates.insert(ResultingResourceState); - AddedState = true; } } } } - return AddedState; +} + + +// +// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a +// valid transition from this state i.e., can an instruction of type InsnClass +// be added to the packet represented by this state. +// +bool State::canAddInsnClass(unsigned InsnClass) const { + for (std::set<unsigned>::const_iterator SI = stateInfo.begin(); + SI != stateInfo.end(); ++SI) { + if (~*SI & InsnClass) + return true; + } + return false; } @@ -267,7 +288,9 @@ void DFA::addTransition(Transition *T) { LargestInput = T->input; // Add the new transition. - stateTransitions[T->from].push_back(T); + bool Added = stateTransitions[T->from].insert(T).second; + assert(Added && "Cannot have multiple states for the same input"); + (void)Added; } @@ -281,11 +304,13 @@ State *DFA::getTransition(State *From, unsigned I) { return NULL; // Do we have a transition from state From with Input I? - for (SmallVector<Transition*, 16>::iterator VI = - stateTransitions[From].begin(); - VI != stateTransitions[From].end(); ++VI) - if ((*VI)->input == I) - return (*VI)->to; + Transition TVal(NULL, I, NULL); + // Do not count this temporal instance + Transition::currentTransitionNum--; + std::set<Transition*, ltTransition>::iterator T = + stateTransitions[From].find(&TVal); + if (T != stateTransitions[From].end()) + return (*T)->to; return NULL; } @@ -331,11 +356,12 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) { StateEntry[i] = ValidTransitions; for (unsigned j = 0; j <= LargestInput; ++j) { assert (((*SI)->stateNum == (int) i) && "Mismatch in state numbers"); - if (!isValidTransition(*SI, j)) + State *To = getTransition(*SI, j); + if (To == NULL) continue; OS << "{" << j << ", " - << getTransition(*SI, j)->stateNum + << To->stateNum << "}, "; ++ValidTransitions; } @@ -514,8 +540,10 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { // and the state can accommodate this InsnClass, create a transition. // if (!D.getTransition(current, InsnClass) && - current->canAddInsnClass(InsnClass, NewStateResources)) { + current->canAddInsnClass(InsnClass)) { State *NewState = NULL; + current->AddInsnClass(InsnClass, NewStateResources); + assert(NewStateResources.size() && "New states must be generated"); // // If we have seen this state before, then do not create a new state. diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index 6fb2feecbc..7b9354c169 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -317,6 +317,8 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type, MEM("i256mem"); MEM("f128mem"); MEM("f256mem"); + MEM("v128mem"); + MEM("v256mem"); MEM("opaque512mem"); // all R, I, R, I diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 1b871a8b66..8089908c1f 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -422,15 +422,18 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, // Get processor itinerary name const std::string &Name = Proc->getName(); - // Skip default - if (Name == "NoItineraries") continue; - - // Create and expand processor itinerary to cover all itinerary classes - std::vector<InstrItinerary> ItinList; - ItinList.resize(NItinClasses); - // Get itinerary data list std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID"); + std::vector<InstrItinerary> ItinList; + + // Add an empty itinerary. + if (ItinDataList.empty()) { + ProcList.push_back(ItinList); + continue; + } + + // Expand processor itinerary to cover all itinerary classes + ItinList.resize(NItinClasses); // For each itinerary data for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) { @@ -495,7 +498,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS, unsigned Find = ItinClassesMap[Name]; // Set up itinerary as location and location + stage count - unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps"); + int NumUOps = ItinData->getValueAsInt("NumMicroOps"); InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages, FindOperandCycle, FindOperandCycle + NOperandCycles}; @@ -559,8 +562,6 @@ EmitProcessorData(raw_ostream &OS, const std::string &Name = Itin->getName(); // Skip default - if (Name == "NoItineraries") continue; - // Begin processor itinerary properties OS << "\n"; OS << "static const llvm::InstrItineraryProps " << Name << "Props(\n"; @@ -570,42 +571,45 @@ EmitProcessorData(raw_ostream &OS, EmitItineraryProp(OS, Itin, "HighLatency", ' '); OS << ");\n"; - // Begin processor itinerary table - OS << "\n"; - OS << "static const llvm::InstrItinerary " << Name << "Entries" - << "[] = {\n"; - // For each itinerary class std::vector<InstrItinerary> &ItinList = *ProcListIter++; - assert(ItinList.size() == ItinClassList.size() && "bad itinerary"); - for (unsigned j = 0, M = ItinList.size(); j < M; ++j) { - InstrItinerary &Intinerary = ItinList[j]; - - // Emit in the form of - // { firstStage, lastStage, firstCycle, lastCycle } // index - if (Intinerary.FirstStage == 0) { - OS << " { 1, 0, 0, 0, 0 }"; - } else { - OS << " { " << - Intinerary.NumMicroOps << ", " << - Intinerary.FirstStage << ", " << - Intinerary.LastStage << ", " << - Intinerary.FirstOperandCycle << ", " << - Intinerary.LastOperandCycle << " }"; - } + if (!ItinList.empty()) { + assert(ItinList.size() == ItinClassList.size() && "bad itinerary"); - OS << ", // " << j << " " << ItinClassList[j]->getName() << "\n"; + // Begin processor itinerary table + OS << "\n"; + OS << "static const llvm::InstrItinerary " << Name << "Entries" + << "[] = {\n"; + + for (unsigned j = 0, M = ItinList.size(); j < M; ++j) { + InstrItinerary &Intinerary = ItinList[j]; + + // Emit in the form of + // { firstStage, lastStage, firstCycle, lastCycle } // index + if (Intinerary.FirstStage == 0) { + OS << " { 1, 0, 0, 0, 0 }"; + } else { + OS << " { " << + Intinerary.NumMicroOps << ", " << + Intinerary.FirstStage << ", " << + Intinerary.LastStage << ", " << + Intinerary.FirstOperandCycle << ", " << + Intinerary.LastOperandCycle << " }"; + } + OS << ", // " << j << " " << ItinClassList[j]->getName() << "\n"; + } + // End processor itinerary table + OS << " { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n"; + OS << "};\n"; } - - // End processor itinerary table - OS << " { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n"; - OS << "};\n"; - OS << '\n'; OS << "static const llvm::InstrItinerarySubtargetValue " << Name << " = {\n"; OS << " &" << Name << "Props,\n"; - OS << " " << Name << "Entries\n"; + if (ItinList.empty()) + OS << " 0\n"; + else + OS << " " << Name << "Entries\n"; OS << "};\n"; } } diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index afb25be7ff..0ab21c8a54 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -1106,6 +1106,8 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("VR128", TYPE_XMM128) TYPE("f128mem", TYPE_M128) TYPE("f256mem", TYPE_M256) + TYPE("v128mem", TYPE_M128) + TYPE("v256mem", TYPE_M256) TYPE("FR64", TYPE_XMM64) TYPE("f64mem", TYPE_M64FP) TYPE("sdmem", TYPE_M64FP) @@ -1235,6 +1237,8 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString ENCODING("sdmem", ENCODING_RM) ENCODING("f128mem", ENCODING_RM) ENCODING("f256mem", ENCODING_RM) + ENCODING("v128mem", ENCODING_RM) + ENCODING("v256mem", ENCODING_RM) ENCODING("f64mem", ENCODING_RM) ENCODING("f32mem", ENCODING_RM) ENCODING("i128mem", ENCODING_RM) diff --git a/utils/obj2yaml/CMakeLists.txt b/utils/obj2yaml/CMakeLists.txt new file mode 100644 index 0000000000..d64bf1bad8 --- /dev/null +++ b/utils/obj2yaml/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS archive object) + +add_llvm_utility(obj2yaml + obj2yaml.cpp coff2yaml.cpp + ) + +target_link_libraries(obj2yaml LLVMSupport) diff --git a/utils/obj2yaml/Makefile b/utils/obj2yaml/Makefile new file mode 100644 index 0000000000..5b96bdd5b9 --- /dev/null +++ b/utils/obj2yaml/Makefile @@ -0,0 +1,20 @@ +##===- utils/obj2yaml/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = obj2yaml +USEDLIBS = LLVMObject.a LLVMSupport.a + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +# Don't install this utility +NO_INSTALL = 1 + +include $(LEVEL)/Makefile.common diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp new file mode 100644 index 0000000000..2dbd53117a --- /dev/null +++ b/utils/obj2yaml/coff2yaml.cpp @@ -0,0 +1,361 @@ +//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "obj2yaml.h" + +#include "llvm/Object/COFF.h" + + +template <typename One, typename Two> +struct pod_pair { // I'd much rather use std::pair, but it's not a POD + One first; + Two second; +}; + +#define STRING_PAIR(x) {llvm::COFF::x, #x} +static const pod_pair<llvm::COFF::MachineTypes, const char *> +MachineTypePairs [] = { + STRING_PAIR(IMAGE_FILE_MACHINE_UNKNOWN), + STRING_PAIR(IMAGE_FILE_MACHINE_AM33), + STRING_PAIR(IMAGE_FILE_MACHINE_AMD64), + STRING_PAIR(IMAGE_FILE_MACHINE_ARM), + STRING_PAIR(IMAGE_FILE_MACHINE_ARMV7), + STRING_PAIR(IMAGE_FILE_MACHINE_EBC), + STRING_PAIR(IMAGE_FILE_MACHINE_I386), + STRING_PAIR(IMAGE_FILE_MACHINE_IA64), + STRING_PAIR(IMAGE_FILE_MACHINE_M32R), + STRING_PAIR(IMAGE_FILE_MACHINE_MIPS16), + STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU), + STRING_PAIR(IMAGE_FILE_MACHINE_MIPSFPU16), + STRING_PAIR(IMAGE_FILE_MACHINE_POWERPC), + STRING_PAIR(IMAGE_FILE_MACHINE_POWERPCFP), + STRING_PAIR(IMAGE_FILE_MACHINE_R4000), + STRING_PAIR(IMAGE_FILE_MACHINE_SH3), + STRING_PAIR(IMAGE_FILE_MACHINE_SH3DSP), + STRING_PAIR(IMAGE_FILE_MACHINE_SH4), + STRING_PAIR(IMAGE_FILE_MACHINE_SH5), + STRING_PAIR(IMAGE_FILE_MACHINE_THUMB), + STRING_PAIR(IMAGE_FILE_MACHINE_WCEMIPSV2) +}; + +static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> +SectionCharacteristicsPairs1 [] = { + STRING_PAIR(IMAGE_SCN_TYPE_NO_PAD), + STRING_PAIR(IMAGE_SCN_CNT_CODE), + STRING_PAIR(IMAGE_SCN_CNT_INITIALIZED_DATA), + STRING_PAIR(IMAGE_SCN_CNT_UNINITIALIZED_DATA), + STRING_PAIR(IMAGE_SCN_LNK_OTHER), + STRING_PAIR(IMAGE_SCN_LNK_INFO), + STRING_PAIR(IMAGE_SCN_LNK_REMOVE), + STRING_PAIR(IMAGE_SCN_LNK_COMDAT), + STRING_PAIR(IMAGE_SCN_GPREL), + STRING_PAIR(IMAGE_SCN_MEM_PURGEABLE), + STRING_PAIR(IMAGE_SCN_MEM_16BIT), + STRING_PAIR(IMAGE_SCN_MEM_LOCKED), + STRING_PAIR(IMAGE_SCN_MEM_PRELOAD) +}; + +static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> +SectionCharacteristicsPairsAlignment [] = { + STRING_PAIR(IMAGE_SCN_ALIGN_1BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_2BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_4BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_8BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_16BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_32BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_64BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_128BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_256BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_512BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_1024BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_2048BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_4096BYTES), + STRING_PAIR(IMAGE_SCN_ALIGN_8192BYTES) +}; + +static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> +SectionCharacteristicsPairs2 [] = { + STRING_PAIR(IMAGE_SCN_LNK_NRELOC_OVFL), + STRING_PAIR(IMAGE_SCN_MEM_DISCARDABLE), + STRING_PAIR(IMAGE_SCN_MEM_NOT_CACHED), + STRING_PAIR(IMAGE_SCN_MEM_NOT_PAGED), + STRING_PAIR(IMAGE_SCN_MEM_SHARED), + STRING_PAIR(IMAGE_SCN_MEM_EXECUTE), + STRING_PAIR(IMAGE_SCN_MEM_READ), + STRING_PAIR(IMAGE_SCN_MEM_WRITE) +}; + +static const pod_pair<llvm::COFF::SymbolBaseType, const char *> +SymbolBaseTypePairs [] = { + STRING_PAIR(IMAGE_SYM_TYPE_NULL), + STRING_PAIR(IMAGE_SYM_TYPE_VOID), + STRING_PAIR(IMAGE_SYM_TYPE_CHAR), + STRING_PAIR(IMAGE_SYM_TYPE_SHORT), + STRING_PAIR(IMAGE_SYM_TYPE_INT), + STRING_PAIR(IMAGE_SYM_TYPE_LONG), + STRING_PAIR(IMAGE_SYM_TYPE_FLOAT), + STRING_PAIR(IMAGE_SYM_TYPE_DOUBLE), + STRING_PAIR(IMAGE_SYM_TYPE_STRUCT), + STRING_PAIR(IMAGE_SYM_TYPE_UNION), + STRING_PAIR(IMAGE_SYM_TYPE_ENUM), + STRING_PAIR(IMAGE_SYM_TYPE_MOE), + STRING_PAIR(IMAGE_SYM_TYPE_BYTE), + STRING_PAIR(IMAGE_SYM_TYPE_WORD), + STRING_PAIR(IMAGE_SYM_TYPE_UINT), + STRING_PAIR(IMAGE_SYM_TYPE_DWORD) +}; + +static const pod_pair<llvm::COFF::SymbolComplexType, const char *> +SymbolComplexTypePairs [] = { + STRING_PAIR(IMAGE_SYM_DTYPE_NULL), + STRING_PAIR(IMAGE_SYM_DTYPE_POINTER), + STRING_PAIR(IMAGE_SYM_DTYPE_FUNCTION), + STRING_PAIR(IMAGE_SYM_DTYPE_ARRAY), +}; + +static const pod_pair<llvm::COFF::SymbolStorageClass, const char *> +SymbolStorageClassPairs [] = { + STRING_PAIR(IMAGE_SYM_CLASS_END_OF_FUNCTION), + STRING_PAIR(IMAGE_SYM_CLASS_NULL), + STRING_PAIR(IMAGE_SYM_CLASS_AUTOMATIC), + STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL), + STRING_PAIR(IMAGE_SYM_CLASS_STATIC), + STRING_PAIR(IMAGE_SYM_CLASS_REGISTER), + STRING_PAIR(IMAGE_SYM_CLASS_EXTERNAL_DEF), + STRING_PAIR(IMAGE_SYM_CLASS_LABEL), + STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_LABEL), + STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT), + STRING_PAIR(IMAGE_SYM_CLASS_ARGUMENT), + STRING_PAIR(IMAGE_SYM_CLASS_STRUCT_TAG), + STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_UNION), + STRING_PAIR(IMAGE_SYM_CLASS_UNION_TAG), + STRING_PAIR(IMAGE_SYM_CLASS_TYPE_DEFINITION), + STRING_PAIR(IMAGE_SYM_CLASS_UNDEFINED_STATIC), + STRING_PAIR(IMAGE_SYM_CLASS_ENUM_TAG), + STRING_PAIR(IMAGE_SYM_CLASS_MEMBER_OF_ENUM), + STRING_PAIR(IMAGE_SYM_CLASS_REGISTER_PARAM), + STRING_PAIR(IMAGE_SYM_CLASS_BIT_FIELD), + STRING_PAIR(IMAGE_SYM_CLASS_BLOCK), + STRING_PAIR(IMAGE_SYM_CLASS_FUNCTION), + STRING_PAIR(IMAGE_SYM_CLASS_END_OF_STRUCT), + STRING_PAIR(IMAGE_SYM_CLASS_FILE), + STRING_PAIR(IMAGE_SYM_CLASS_SECTION), + STRING_PAIR(IMAGE_SYM_CLASS_WEAK_EXTERNAL), + STRING_PAIR(IMAGE_SYM_CLASS_CLR_TOKEN), +}; + +static const pod_pair<llvm::COFF::RelocationTypeX86, const char *> +RelocationTypeX86Pairs [] = { + STRING_PAIR(IMAGE_REL_I386_ABSOLUTE), + STRING_PAIR(IMAGE_REL_I386_DIR16), + STRING_PAIR(IMAGE_REL_I386_REL16), + STRING_PAIR(IMAGE_REL_I386_DIR32), + STRING_PAIR(IMAGE_REL_I386_DIR32NB), + STRING_PAIR(IMAGE_REL_I386_SEG12), + STRING_PAIR(IMAGE_REL_I386_SECTION), + STRING_PAIR(IMAGE_REL_I386_SECREL), + STRING_PAIR(IMAGE_REL_I386_TOKEN), + STRING_PAIR(IMAGE_REL_I386_SECREL7), + STRING_PAIR(IMAGE_REL_I386_REL32), + STRING_PAIR(IMAGE_REL_AMD64_ABSOLUTE), + STRING_PAIR(IMAGE_REL_AMD64_ADDR64), + STRING_PAIR(IMAGE_REL_AMD64_ADDR32), + STRING_PAIR(IMAGE_REL_AMD64_ADDR32NB), + STRING_PAIR(IMAGE_REL_AMD64_REL32), + STRING_PAIR(IMAGE_REL_AMD64_REL32_1), + STRING_PAIR(IMAGE_REL_AMD64_REL32_2), + STRING_PAIR(IMAGE_REL_AMD64_REL32_3), + STRING_PAIR(IMAGE_REL_AMD64_REL32_4), + STRING_PAIR(IMAGE_REL_AMD64_REL32_5), + STRING_PAIR(IMAGE_REL_AMD64_SECTION), + STRING_PAIR(IMAGE_REL_AMD64_SECREL), + STRING_PAIR(IMAGE_REL_AMD64_SECREL7), + STRING_PAIR(IMAGE_REL_AMD64_TOKEN), + STRING_PAIR(IMAGE_REL_AMD64_SREL32), + STRING_PAIR(IMAGE_REL_AMD64_PAIR), + STRING_PAIR(IMAGE_REL_AMD64_SSPAN32) +}; + +static const pod_pair<llvm::COFF::RelocationTypesARM, const char *> +RelocationTypesARMPairs [] = { + STRING_PAIR(IMAGE_REL_ARM_ABSOLUTE), + STRING_PAIR(IMAGE_REL_ARM_ADDR32), + STRING_PAIR(IMAGE_REL_ARM_ADDR32NB), + STRING_PAIR(IMAGE_REL_ARM_BRANCH24), + STRING_PAIR(IMAGE_REL_ARM_BRANCH11), + STRING_PAIR(IMAGE_REL_ARM_TOKEN), + STRING_PAIR(IMAGE_REL_ARM_BLX24), + STRING_PAIR(IMAGE_REL_ARM_BLX11), + STRING_PAIR(IMAGE_REL_ARM_SECTION), + STRING_PAIR(IMAGE_REL_ARM_SECREL), + STRING_PAIR(IMAGE_REL_ARM_MOV32A), + STRING_PAIR(IMAGE_REL_ARM_MOV32T), + STRING_PAIR(IMAGE_REL_ARM_BRANCH20T), + STRING_PAIR(IMAGE_REL_ARM_BRANCH24T), + STRING_PAIR(IMAGE_REL_ARM_BLX23T) +}; +#undef STRING_PAIR + + +static const char endl = '\n'; + +namespace yaml { // COFF-specific yaml-writing specific routines + +static llvm::raw_ostream &writeName(llvm::raw_ostream &Out, + const char *Name, std::size_t NameSize) { + for (std::size_t i = 0; i < NameSize; ++i) { + if (!Name[i]) break; + Out << Name[i]; + } + return Out; +} + +// Given an array of pod_pair<enum, const char *>, write all enums that match +template <typename T, std::size_t N> +static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out, + const pod_pair<T, const char *> (&Arr)[N], unsigned long Val) { + for (std::size_t i = 0; i < N; ++i) + if (Val & Arr[i].first) + Out << Arr[i].second << ", "; + return Out; +} + +} // end of yaml namespace + +// Given an array of pod_pair<enum, const char *>, look up a value +template <typename T, std::size_t N> +const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N], + unsigned long Val, const char *NotFound = NULL) { + T n = static_cast<T>(Val); + for (std::size_t i = 0; i < N; ++i) + if (n == Arr[i].first) + return Arr[i].second; + return NotFound; +} + + +static llvm::raw_ostream &yamlCOFFHeader( + const llvm::object::coff_file_header *Header,llvm::raw_ostream &Out) { + + Out << "header: !Header" << endl; + Out << " Machine: "; + Out << nameLookup(MachineTypePairs, Header->Machine, "# Unknown_MachineTypes") + << " # ("; + return yaml::writeHexNumber(Out, Header->Machine) << ")" << endl << endl; +} + + +static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, + std::size_t NumSections, llvm::raw_ostream &Out) { + llvm::error_code ec; + Out << "sections:" << endl; + for (llvm::object::section_iterator iter = Obj.begin_sections(); + iter != Obj.end_sections(); iter.increment(ec)) { + const llvm::object::coff_section *sect = Obj.getCOFFSection(iter); + + Out << " - !Section" << endl; + Out << " Name: "; + yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << endl; + + Out << " Characteristics: ["; + yaml::writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics); + Out << nameLookup(SectionCharacteristicsPairsAlignment, + sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN") + << ", "; + yaml::writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics); + Out << "] # "; + yaml::writeHexNumber(Out, sect->Characteristics) << endl; + + llvm::ArrayRef<uint8_t> sectionData; + Obj.getSectionContents(sect, sectionData); + Out << " SectionData: "; + yaml::writeHexStream(Out, sectionData) << endl; + + for (llvm::object::relocation_iterator rIter = iter->begin_relocations(); + rIter != iter->end_relocations(); rIter.increment(ec)) { + const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter); + + Out << " - !Relocation" << endl; + Out << " VirtualAddress: " ; + yaml::writeHexNumber(Out, reloc->VirtualAddress) << endl; + Out << " SymbolTableIndex: " << reloc->SymbolTableIndex << endl; + Out << " Type: " + << nameLookup(RelocationTypeX86Pairs, reloc->Type) << endl; + // TODO: Use the correct reloc type for the machine. + Out << endl; + } + + } + return Out; +} + +static llvm::raw_ostream& yamlCOFFSymbols(llvm::object::COFFObjectFile &Obj, + std::size_t NumSymbols, llvm::raw_ostream &Out) { + llvm::error_code ec; + Out << "symbols:" << endl; + for (llvm::object::symbol_iterator iter = Obj.begin_symbols(); + iter != Obj.end_symbols(); iter.increment(ec)) { + // Gather all the info that we need + llvm::StringRef str; + const llvm::object::coff_symbol *symbol = Obj.getCOFFSymbol(iter); + Obj.getSymbolName(symbol, str); + std::size_t simpleType = symbol->getBaseType(); + std::size_t complexType = symbol->getComplexType(); + std::size_t storageClass = symbol->StorageClass; + + Out << " - !Symbol" << endl; + Out << " Name: " << str << endl; + + Out << " Value: " << symbol->Value << endl; + Out << " SectionNumber: " << symbol->SectionNumber << endl; + + Out << " SimpleType: " + << nameLookup(SymbolBaseTypePairs, simpleType, + "# Unknown_SymbolBaseType") + << " # (" << simpleType << ")" << endl; + + Out << " ComplexType: " + << nameLookup(SymbolComplexTypePairs, complexType, + "# Unknown_SymbolComplexType") + << " # (" << complexType << ")" << endl; + + Out << " StorageClass: " + << nameLookup(SymbolStorageClassPairs, storageClass, + "# Unknown_StorageClass") + << " # (" << (int) storageClass << ")" << endl; + + if (symbol->NumberOfAuxSymbols > 0) { + llvm::ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol); + Out << " NumberOfAuxSymbols: " + << (int) symbol->NumberOfAuxSymbols << endl; + Out << " AuxillaryData: "; + yaml::writeHexStream(Out, aux); + } + + Out << endl; + } + + return Out; +} + + +llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj) { + llvm::error_code ec; + llvm::object::COFFObjectFile obj(TheObj, ec); + if (!ec) { + const llvm::object::coff_file_header *hd; + ec = obj.getHeader(hd); + if (!ec) { + yamlCOFFHeader(hd, Out); + yamlCOFFSections(obj, hd->NumberOfSections, Out); + yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out); + } + } + return ec; +} diff --git a/utils/obj2yaml/obj2yaml.cpp b/utils/obj2yaml/obj2yaml.cpp new file mode 100644 index 0000000000..ff253fa131 --- /dev/null +++ b/utils/obj2yaml/obj2yaml.cpp @@ -0,0 +1,89 @@ +//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "obj2yaml.h" + +#include "llvm/ADT/OwningPtr.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" + +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" + +const char endl = '\n'; + +namespace yaml { // generic yaml-writing specific routines + +unsigned char printable(unsigned char Ch) { + return Ch >= ' ' && Ch <= '~' ? Ch : '.'; +} + +llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out, + const llvm::ArrayRef<uint8_t> arr) { + const char *hex = "0123456789ABCDEF"; + Out << " !hex \""; + + typedef llvm::ArrayRef<uint8_t>::const_iterator iter_t; + const iter_t end = arr.end(); + for (iter_t iter = arr.begin(); iter != end; ++iter) + Out << hex[(*iter >> 4) & 0x0F] << hex[(*iter & 0x0F)]; + + Out << "\" # |"; + for (iter_t iter = arr.begin(); iter != end; ++iter) + Out << printable(*iter); + Out << "|" << endl; + + return Out; + } + +llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out, unsigned long long N) { + if (N >= 10) + Out << "0x"; + Out.write_hex(N); + return Out; +} + +} + + +using namespace llvm; +enum ObjectFileType { coff }; + +cl::opt<ObjectFileType> InputFormat( + cl::desc("Choose input format"), + cl::values( + clEnumVal(coff, "process COFF object files"), + clEnumValEnd)); + +cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-")); + +int main(int argc, char * argv[]) { + cl::ParseCommandLineOptions(argc, argv); + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + +// Process the input file + OwningPtr<MemoryBuffer> buf; + +// TODO: If this is an archive, then burst it and dump each entry + if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf)) + llvm::errs() << "Error: '" << ec.message() << "' opening file '" + << InputFilename << "'" << endl; + else { + ec = coff2yaml(llvm::outs(), buf.take()); + if (ec) + llvm::errs() << "Error: " << ec.message() << " dumping COFF file" << endl; + } + + return 0; +} diff --git a/utils/obj2yaml/obj2yaml.h b/utils/obj2yaml/obj2yaml.h new file mode 100644 index 0000000000..2a23b49682 --- /dev/null +++ b/utils/obj2yaml/obj2yaml.h @@ -0,0 +1,35 @@ +//===------ utils/obj2yaml.hpp - obj2yaml conversion tool -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This file declares some helper routines, and also the format-specific +// writers. To add a new format, add the declaration here, and, in a separate +// source file, implement it. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UTILS_OBJ2YAML_H +#define LLVM_UTILS_OBJ2YAML_H + +#include "llvm/ADT/ArrayRef.h" + +#include "llvm/Support/system_error.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace yaml { // routines for writing YAML +// Write a hex stream: +// <Prefix> !hex: "<hex digits>" #|<ASCII chars>\n + llvm::raw_ostream &writeHexStream + (llvm::raw_ostream &Out, const llvm::ArrayRef<uint8_t> arr); + +// Writes a number in hex; prefix it by 0x if it is >= 10 + llvm::raw_ostream &writeHexNumber + (llvm::raw_ostream &Out, unsigned long long N); +} + +llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj); + +#endif diff --git a/utils/test_debuginfo.pl b/utils/test_debuginfo.pl index fb61fb0261..a03a117064 100755 --- a/utils/test_debuginfo.pl +++ b/utils/test_debuginfo.pl @@ -24,7 +24,7 @@ my $debugger_script_file = "$output_dir/$input_filename.debugger.script"; my $output_file = "$output_dir/$input_filename.gdb.output"; # Extract debugger commands from testcase. They are marked with DEBUGGER: -# at the beginnign of a comment line. +# at the beginning of a comment line. open(INPUT, $testcase_file); open(OUTPUT, ">$debugger_script_file"); while(<INPUT>) { |