diff options
571 files changed, 17688 insertions, 4710 deletions
@@ -116,6 +116,7 @@ cross-compile-build-tools: cd BuildTools ; \ unset CFLAGS ; \ unset CXXFLAGS ; \ + unset SDKROOT ; \ $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \ --host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE) \ --disable-polly ; \ diff --git a/bindings/ocaml/executionengine/executionengine_ocaml.c b/bindings/ocaml/executionengine/executionengine_ocaml.c index 5b1e32efef..02e0306057 100644 --- a/bindings/ocaml/executionengine/executionengine_ocaml.c +++ b/bindings/ocaml/executionengine/executionengine_ocaml.c @@ -75,6 +75,9 @@ static struct custom_operations generic_value_ops = { custom_hash_default, custom_serialize_default, custom_deserialize_default +#ifdef custom_compare_ext_default + , custom_compare_ext_default +#endif }; static value alloc_generic_value(LLVMGenericValueRef Ref) { diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index a5985d9d2b..c984bd154c 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -1277,6 +1277,9 @@ static struct custom_operations builder_ops = { custom_hash_default, custom_serialize_default, custom_deserialize_default +#ifdef custom_compare_ext_default + , custom_compare_ext_default +#endif }; static value alloc_builder(LLVMBuilderRef B) { diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 94cc5551c8..fcd5dd5566 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -85,13 +85,25 @@ check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H) # library checks if( NOT PURE_WINDOWS ) check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) - check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC) - check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT) + if (HAVE_LIBPTHREAD) + check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC) + check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT) + check_library_exists(pthread pthread_mutex_lock "" HAVE_PTHREAD_MUTEX_LOCK) + else() + # this could be Android + check_library_exists(c pthread_create "" PTHREAD_IN_LIBC) + if (PTHREAD_IN_LIBC) + check_library_exists(c pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC) + check_library_exists(c pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT) + check_library_exists(c pthread_mutex_lock "" HAVE_PTHREAD_MUTEX_LOCK) + endif() + endif() check_library_exists(dl dlopen "" HAVE_LIBDL) endif() # function checks check_symbol_exists(arc4random "stdlib.h" HAVE_ARC4RANDOM) +check_symbol_exists(backtrace "execinfo.h" HAVE_BACKTRACE) check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE) check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE) check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT) @@ -134,9 +146,6 @@ check_symbol_exists(strchr string.h HAVE_STRCHR) check_symbol_exists(strcmp string.h HAVE_STRCMP) check_symbol_exists(strdup string.h HAVE_STRDUP) check_symbol_exists(strrchr string.h HAVE_STRRCHR) -if( NOT PURE_WINDOWS ) - check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK) -endif() check_symbol_exists(sbrk unistd.h HAVE_SBRK) check_symbol_exists(srand48 stdlib.h HAVE_RAND48_SRAND48) if( HAVE_RAND48_SRAND48 ) diff --git a/docs/LangRef.html b/docs/LangRef.html index 810fce5e7a..4daab592e9 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -1207,13 +1207,6 @@ define void @f() optsize { ... } may make calls to the function faster, at the cost of extra program startup time if the function is not called during program startup.</dd> - <dt><tt><b>ia_nsdialect</b></tt></dt> - <dd>This attribute indicates the associated inline assembly call is using a - non-standard assembly dialect. The standard dialect is ATT, which is - assumed when this attribute is not present. When present, the dialect - is assumed to be Intel. Currently, ATT and Intel are the only supported - dialects.</dd> - <dt><tt><b>inlinehint</b></tt></dt> <dd>This attribute indicates that the source code contained a hint that inlining this function is desirable (such as the "inline" keyword in C/C++). It @@ -2901,8 +2894,18 @@ call void asm sideeffect "eieio", ""() call void asm alignstack "eieio", ""() </pre> -<p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come - first.</p> +<p>Inline asms also support using non-standard assembly dialects. The assumed + dialect is ATT. When the '<tt>inteldialect</tt>' keyword is present, the + inline asm is using the Intel dialect. Currently, ATT and Intel are the + only supported dialects. An example is:</p> + +<pre class="doc_code"> +call void asm inteldialect "eieio", ""() +</pre> + +<p>If multiple keywords appear the '<tt>sideeffect</tt>' keyword must come + first, the '<tt>alignstack</tt>' keyword second and the + '<tt>inteldialect</tt>' keyword last.</p> <!-- <p>TODO: The format of the asm and constraints string still need to be diff --git a/docs/Makefile.sphinx b/docs/Makefile.sphinx index 21f66488b2..81c13de9cd 100644 --- a/docs/Makefile.sphinx +++ b/docs/Makefile.sphinx @@ -46,6 +46,10 @@ clean: html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo + @# FIXME: Remove this `cp` once HTML->Sphinx transition is completed. + @# Kind of a hack, but HTML-formatted docs are on the way out anyway. + @echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html" + @cp -a *.html tutorial $(BUILDDIR)/html @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 1ab2b709f5..75a6fd1ca1 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -499,13 +499,12 @@ Release Notes</a>.</h1> <div> -<p>We have changed the way that the Type Legalizer legalizes vectors. The type - legalizer now attempts to promote integer elements. This enabled the - implementation of vector-select. Additionally, we see a performance boost on - workloads which use vectors of chars and shorts, since they are now promoted - to 32-bit types, which are better supported by the SIMD instruction set. - Floating point types are still widened as before.</p> - +<p>Stack Coloring - We have implemented a new optimization pass + to merge stack objects which are used in disjoin areas of the code. + This optimization reduces the required stack space significantly, in cases + where it is clear to the optimizer that the stack slot is not shared. + We use the lifetime markers to tell the codegen that a certain alloca + is used within a region.</p> <p>We have put a significant amount of work into the code generator infrastructure, which allows us to implement more aggressive algorithms and diff --git a/docs/llvm-theme/layout.html b/docs/_themes/llvm-theme/layout.html index 746c2f56c8..746c2f56c8 100644 --- a/docs/llvm-theme/layout.html +++ b/docs/_themes/llvm-theme/layout.html diff --git a/docs/llvm-theme/static/contents.png b/docs/_themes/llvm-theme/static/contents.png Binary files differindex 7fb82154a1..7fb82154a1 100644 --- a/docs/llvm-theme/static/contents.png +++ b/docs/_themes/llvm-theme/static/contents.png diff --git a/docs/llvm-theme/static/llvm-theme.css b/docs/_themes/llvm-theme/static/llvm-theme.css index f684d00ce4..f684d00ce4 100644 --- a/docs/llvm-theme/static/llvm-theme.css +++ b/docs/_themes/llvm-theme/static/llvm-theme.css diff --git a/docs/llvm-theme/static/logo.png b/docs/_themes/llvm-theme/static/logo.png Binary files differindex 18d424c53c..18d424c53c 100644 --- a/docs/llvm-theme/static/logo.png +++ b/docs/_themes/llvm-theme/static/logo.png diff --git a/docs/llvm-theme/static/navigation.png b/docs/_themes/llvm-theme/static/navigation.png Binary files differindex 1081dc1439..1081dc1439 100644 --- a/docs/llvm-theme/static/navigation.png +++ b/docs/_themes/llvm-theme/static/navigation.png diff --git a/docs/llvm-theme/theme.conf b/docs/_themes/llvm-theme/theme.conf index 573fd78aba..573fd78aba 100644 --- a/docs/llvm-theme/theme.conf +++ b/docs/_themes/llvm-theme/theme.conf diff --git a/docs/conf.py b/docs/conf.py index de0585ddb0..a1e9b5f6e2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -98,7 +98,7 @@ html_theme = 'llvm-theme' #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -html_theme_path = ["."] +html_theme_path = ["_themes"] # The name for this set of Sphinx documents. If None, it defaults to # "<project> v<release> documentation". @@ -134,18 +134,7 @@ html_sidebars = {'index': 'indexsidebar.html'} # Additional templates that should be rendered to pages, maps page names to # template names. -# -# We load all the old-school HTML documentation pages into Sphinx here. -basedir = os.path.dirname(__file__) -html_additional_pages = {} -for directory in ('', 'tutorial'): - for file in os.listdir(os.path.join(basedir, directory)): - if not file.endswith('.html'): - continue - - subpath = os.path.join(directory, file) - name,_ = os.path.splitext(subpath) - html_additional_pages[name] = subpath +#html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True @@ -226,6 +215,7 @@ man_pages = [] # Automatically derive the list of man pages from the contents of the command # guide subdirectory. +basedir = os.path.dirname(__file__) man_page_authors = "Maintained by The LLVM Team (http://llvm.org/)." command_guide_subpath = 'CommandGuide' command_guide_path = os.path.join(basedir, command_guide_subpath) @@ -237,9 +227,8 @@ for name in os.listdir(command_guide_path): # Otherwise, automatically extract the description. file_subpath = os.path.join(command_guide_subpath, name) with open(os.path.join(command_guide_path, name)) as f: - it = iter(f) - title = it.next()[:-1] - header = it.next()[:-1] + title = f.readline().rstrip('\n') + header = f.readline().rstrip('\n') if len(header) != len(title): print >>sys.stderr, ( diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index eceae5c7b6..6587e77080 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -173,10 +173,11 @@ typedef enum { LLVMUWTable = 1 << 30, LLVMNonLazyBind = 1 << 31 - // FIXME: This attribute is currently not included in the C API as - // a temporary measure until the API/ABI impact to the C API is understood - // and the path forward agreed upon. - //LLVMAddressSafety = 1ULL << 32 + /* FIXME: This attribute is currently not included in the C API as + a temporary measure until the API/ABI impact to the C API is understood + and the path forward agreed upon. + LLVMAddressSafety = 1ULL << 32 + */ } LLVMAttribute; typedef enum { @@ -2687,7 +2688,7 @@ namespace llvm { template<typename T> inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { - #if DEBUG + #ifdef DEBUG for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) cast<T>(*I); #endif diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index f30a6e3f08..7f20e3356a 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -1238,8 +1238,8 @@ public: /// countLeadingZeros - This function is an APInt version of the /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number /// of zeros from the most significant bit to the first one bit. - /// @returns BitWidth if the value is zero. - /// @returns the number of zeros from the most significant bit to the first + /// @returns BitWidth if the value is zero, otherwise + /// returns the number of zeros from the most significant bit to the first /// one bits. unsigned countLeadingZeros() const { if (isSingleWord()) { @@ -1252,8 +1252,8 @@ public: /// countLeadingOnes - This function is an APInt version of the /// countLeadingOnes_{32,64} functions in MathExtras.h. It counts the number /// of ones from the most significant bit to the first zero bit. - /// @returns 0 if the high order bit is not set - /// @returns the number of 1 bits from the most significant to the least + /// @returns 0 if the high order bit is not set, otherwise + /// returns the number of 1 bits from the most significant to the least /// @brief Count the number of leading one bits. unsigned countLeadingOnes() const; @@ -1266,8 +1266,8 @@ public: /// countTrailingZeros - This function is an APInt version of the /// countTrailingZeros_{32,64} functions in MathExtras.h. It counts /// the number of zeros from the least significant bit to the first set bit. - /// @returns BitWidth if the value is zero. - /// @returns the number of zeros from the least significant bit to the first + /// @returns BitWidth if the value is zero, otherwise + /// returns the number of zeros from the least significant bit to the first /// one bit. /// @brief Count the number of trailing zero bits. unsigned countTrailingZeros() const; @@ -1275,8 +1275,8 @@ public: /// countTrailingOnes - This function is an APInt version of the /// countTrailingOnes_{32,64} functions in MathExtras.h. It counts /// the number of ones from the least significant bit to the first zero bit. - /// @returns BitWidth if the value is all ones. - /// @returns the number of ones from the least significant bit to the first + /// @returns BitWidth if the value is all ones, otherwise + /// returns the number of ones from the least significant bit to the first /// zero bit. /// @brief Count the number of trailing one bits. unsigned countTrailingOnes() const { @@ -1288,8 +1288,8 @@ public: /// countPopulation - This function is an APInt version of the /// countPopulation_{32,64} functions in MathExtras.h. It counts the number /// of 1 bits in the APInt value. - /// @returns 0 if the value is zero. - /// @returns the number of set bits. + /// @returns 0 if the value is zero, otherwise returns the number of set + /// bits. /// @brief Count the number of bits set. unsigned countPopulation() const { if (isSingleWord()) diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index 3e2e5f230a..26ec346b18 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -172,7 +172,7 @@ public: unsigned BitPos = Prev % BITWORD_SIZE; BitWord Copy = Bits[WordPos]; // Mask off previous bits. - Copy &= ~0L << BitPos; + Copy &= ~0UL << BitPos; if (Copy != 0) { if (sizeof(BitWord) == 4) @@ -311,7 +311,7 @@ public: return !(*this == RHS); } - // Intersection, union, disjoint union. + /// Intersection, union, disjoint union. BitVector &operator&=(const BitVector &RHS) { unsigned ThisWords = NumBitWords(size()); unsigned RHSWords = NumBitWords(RHS.size()); @@ -328,7 +328,7 @@ public: return *this; } - // reset - Reset bits that are set in RHS. Same as *this &= ~RHS. + /// reset - Reset bits that are set in RHS. Same as *this &= ~RHS. BitVector &reset(const BitVector &RHS) { unsigned ThisWords = NumBitWords(size()); unsigned RHSWords = NumBitWords(RHS.size()); @@ -338,6 +338,23 @@ public: return *this; } + /// test - Check if (This - RHS) is zero. + /// This is the same as reset(RHS) and any(). + bool test(const BitVector &RHS) const { + unsigned ThisWords = NumBitWords(size()); + unsigned RHSWords = NumBitWords(RHS.size()); + unsigned i; + for (i = 0; i != std::min(ThisWords, RHSWords); ++i) + if ((Bits[i] & ~RHS.Bits[i]) != 0) + return true; + + for (; i != ThisWords ; ++i) + if (Bits[i] != 0) + return true; + + return false; + } + BitVector &operator|=(const BitVector &RHS) { if (size() < RHS.size()) resize(RHS.size()); @@ -451,8 +468,11 @@ private: // Then set any stray high bits of the last used word. unsigned ExtraBits = Size % BITWORD_SIZE; if (ExtraBits) { - Bits[UsedWords-1] &= ~(~0L << ExtraBits); - Bits[UsedWords-1] |= (0 - (BitWord)t) << ExtraBits; + BitWord ExtraBitMask = ~0UL << ExtraBits; + if (t) + Bits[UsedWords-1] |= ExtraBitMask; + else + Bits[UsedWords-1] &= ~ExtraBitMask; } } diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index 1559a35c39..6f17a647b6 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -31,12 +31,12 @@ struct DenseMapInfo { template<typename T> struct DenseMapInfo<T*> { static inline T* getEmptyKey() { - intptr_t Val = -1; + uintptr_t Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable; return reinterpret_cast<T*>(Val); } static inline T* getTombstoneKey() { - intptr_t Val = -2; + uintptr_t Val = static_cast<uintptr_t>(-2); Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable; return reinterpret_cast<T*>(Val); } @@ -105,7 +105,7 @@ template<> struct DenseMapInfo<int> { // Provide DenseMapInfo for longs. template<> struct DenseMapInfo<long> { static inline long getEmptyKey() { - return (1UL << (sizeof(long) * 8 - 1)) - 1L; + return (1UL << (sizeof(long) * 8 - 1)) - 1UL; } static inline long getTombstoneKey() { return getEmptyKey() - 1L; } static unsigned getHashValue(const long& Val) { diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h index 6ab07254a2..23633045ff 100644 --- a/include/llvm/ADT/Hashing.h +++ b/include/llvm/ADT/Hashing.h @@ -409,7 +409,6 @@ bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value, /// combining them, this (as an optimization) directly combines the integers. template <typename InputIteratorT> hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { - typedef typename std::iterator_traits<InputIteratorT>::value_type ValueT; const size_t seed = get_execution_seed(); char buffer[64], *buffer_ptr = buffer; char *const buffer_end = buffer_ptr + array_lengthof(buffer); diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index fcc758b43a..71c379bad5 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -135,12 +135,12 @@ template<typename PointerTy, unsigned IntBits, typename IntType> struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > { typedef PointerIntPair<PointerTy, IntBits, IntType> Ty; static Ty getEmptyKey() { - intptr_t Val = -1; + uintptr_t Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable; return Ty(reinterpret_cast<PointerTy>(Val), IntType((1 << IntBits)-1)); } static Ty getTombstoneKey() { - intptr_t Val = -2; + uintptr_t Val = static_cast<uintptr_t>(-2); Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable; return Ty(reinterpret_cast<PointerTy>(Val), IntType(0)); } diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 89774c3f56..791f1082c2 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -158,7 +158,7 @@ public: && "Word Position outside of element"); // Mask off previous bits. - Copy &= ~0L << BitPos; + Copy &= ~0UL << BitPos; if (Copy != 0) { if (sizeof(BitWord) == 4) diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index 5569633348..dc3db4ce1f 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -110,9 +110,9 @@ struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> { /// For sets that may grow to thousands of elements, SparseT should be set to /// uint16_t or uint32_t. /// -/// @param ValueT The type of objects in the set. -/// @param KeyFunctorT A functor that computes an unsigned index from KeyT. -/// @param SparseT An unsigned integer type. See above. +/// @tparam ValueT The type of objects in the set. +/// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT. +/// @tparam SparseT An unsigned integer type. See above. /// template<typename ValueT, typename KeyFunctorT = llvm::identity<unsigned>, diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 655d884e7b..36df5acadb 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -125,7 +125,7 @@ void SplitString(StringRef Source, // X*33+c -> X*33^c static inline unsigned HashString(StringRef Str, unsigned Result = 0) { for (unsigned i = 0, e = Str.size(); i != e; ++i) - Result = Result * 33 + Str[i]; + Result = Result * 33 + (unsigned char)Str[i]; return Result; } diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h deleted file mode 100644 index 845af015b0..0000000000 --- a/include/llvm/ADT/Trie.h +++ /dev/null @@ -1,334 +0,0 @@ -//===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class defines a generic trie structure. The trie structure -// is immutable after creation, but the payload contained within it is not. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_TRIE_H -#define LLVM_ADT_TRIE_H - -#include "llvm/ADT/GraphTraits.h" -#include "llvm/Support/DOTGraphTraits.h" - -#include <cassert> -#include <vector> - -namespace llvm { - -// FIXME: -// - Labels are usually small, maybe it's better to use SmallString -// - Should we use char* during construction? -// - Should we templatize Empty with traits-like interface? - -template<class Payload> -class Trie { - friend class GraphTraits<Trie<Payload> >; - friend class DOTGraphTraits<Trie<Payload> >; -public: - class Node { - friend class Trie; - - public: - typedef std::vector<Node*> NodeVectorType; - typedef typename NodeVectorType::iterator iterator; - typedef typename NodeVectorType::const_iterator const_iterator; - - private: - enum QueryResult { - Same = -3, - StringIsPrefix = -2, - LabelIsPrefix = -1, - DontMatch = 0, - HaveCommonPart - }; - - struct NodeCmp { - bool operator() (Node* N1, Node* N2) { - return (N1->Label[0] < N2->Label[0]); - } - bool operator() (Node* N, char Id) { - return (N->Label[0] < Id); - } - }; - - std::string Label; - Payload Data; - NodeVectorType Children; - - // Do not implement - Node(const Node&); - Node& operator=(const Node&); - - inline void addEdge(Node* N) { - if (Children.empty()) - Children.push_back(N); - else { - iterator I = std::lower_bound(Children.begin(), Children.end(), - N, NodeCmp()); - // FIXME: no dups are allowed - Children.insert(I, N); - } - } - - inline void setEdge(Node* N) { - char Id = N->Label[0]; - iterator I = std::lower_bound(Children.begin(), Children.end(), - Id, NodeCmp()); - assert(I != Children.end() && "Node does not exists!"); - *I = N; - } - - QueryResult query(const std::string& s) const { - unsigned i, l; - unsigned l1 = s.length(); - unsigned l2 = Label.length(); - - // Find the length of common part - l = std::min(l1, l2); - i = 0; - while ((i < l) && (s[i] == Label[i])) - ++i; - - if (i == l) { // One is prefix of another, find who is who - if (l1 == l2) - return Same; - else if (i == l1) - return StringIsPrefix; - else - return LabelIsPrefix; - } else // s and Label have common (possible empty) part, return its length - return (QueryResult)i; - } - - public: - inline explicit Node(const Payload& data, const std::string& label = ""): - Label(label), Data(data) { } - - inline const Payload& data() const { return Data; } - inline void setData(const Payload& data) { Data = data; } - - inline const std::string& label() const { return Label; } - -#if 0 - inline void dump() { - llvm::cerr << "Node: " << this << "\n" - << "Label: " << Label << "\n" - << "Children:\n"; - - for (iterator I = Children.begin(), E = Children.end(); I != E; ++I) - llvm::cerr << (*I)->Label << "\n"; - } -#endif - - inline Node* getEdge(char Id) { - Node* fNode = NULL; - iterator I = std::lower_bound(Children.begin(), Children.end(), - Id, NodeCmp()); - if (I != Children.end() && (*I)->Label[0] == Id) - fNode = *I; - - return fNode; - } - - inline iterator begin() { return Children.begin(); } - inline const_iterator begin() const { return Children.begin(); } - inline iterator end () { return Children.end(); } - inline const_iterator end () const { return Children.end(); } - - inline size_t size () const { return Children.size(); } - inline bool empty() const { return Children.empty(); } - inline const Node* &front() const { return Children.front(); } - inline Node* &front() { return Children.front(); } - inline const Node* &back() const { return Children.back(); } - inline Node* &back() { return Children.back(); } - - }; - -private: - std::vector<Node*> Nodes; - Payload Empty; - - inline Node* addNode(const Payload& data, const std::string label = "") { - Node* N = new Node(data, label); - Nodes.push_back(N); - return N; - } - - inline Node* splitEdge(Node* N, char Id, size_t index) { - Node* eNode = N->getEdge(Id); - assert(eNode && "Node doesn't exist"); - - const std::string &l = eNode->Label; - assert(index > 0 && index < l.length() && "Trying to split too far!"); - std::string l1 = l.substr(0, index); - std::string l2 = l.substr(index); - - Node* nNode = addNode(Empty, l1); - N->setEdge(nNode); - - eNode->Label = l2; - nNode->addEdge(eNode); - - return nNode; - } - - // Do not implement - Trie(const Trie&); - Trie& operator=(const Trie&); - -public: - inline explicit Trie(const Payload& empty):Empty(empty) { - addNode(Empty); - } - inline ~Trie() { - for (unsigned i = 0, e = Nodes.size(); i != e; ++i) - delete Nodes[i]; - } - - inline Node* getRoot() const { return Nodes[0]; } - - bool addString(const std::string& s, const Payload& data); - const Payload& lookup(const std::string& s) const; - -}; - -// Define this out-of-line to dissuade the C++ compiler from inlining it. -template<class Payload> -bool Trie<Payload>::addString(const std::string& s, const Payload& data) { - Node* cNode = getRoot(); - Node* tNode = NULL; - std::string s1(s); - - while (tNode == NULL) { - char Id = s1[0]; - if (Node* nNode = cNode->getEdge(Id)) { - typename Node::QueryResult r = nNode->query(s1); - - switch (r) { - case Node::Same: - case Node::StringIsPrefix: - // Currently we don't allow to have two strings in the trie one - // being a prefix of another. This should be fixed. - assert(0 && "FIXME!"); - return false; - case Node::DontMatch: - llvm_unreachable("Impossible!"); - case Node::LabelIsPrefix: - s1 = s1.substr(nNode->label().length()); - cNode = nNode; - break; - default: - nNode = splitEdge(cNode, Id, r); - tNode = addNode(data, s1.substr(r)); - nNode->addEdge(tNode); - } - } else { - tNode = addNode(data, s1); - cNode->addEdge(tNode); - } - } - - return true; -} - -template<class Payload> -const Payload& Trie<Payload>::lookup(const std::string& s) const { - Node* cNode = getRoot(); - Node* tNode = NULL; - std::string s1(s); - - while (tNode == NULL) { - char Id = s1[0]; - if (Node* nNode = cNode->getEdge(Id)) { - typename Node::QueryResult r = nNode->query(s1); - - switch (r) { - case Node::Same: - tNode = nNode; - break; - case Node::StringIsPrefix: - return Empty; - case Node::DontMatch: - llvm_unreachable("Impossible!"); - case Node::LabelIsPrefix: - s1 = s1.substr(nNode->label().length()); - cNode = nNode; - break; - default: - return Empty; - } - } else - return Empty; - } - - return tNode->data(); -} - -template<class Payload> -struct GraphTraits<Trie<Payload> > { - typedef Trie<Payload> TrieType; - typedef typename TrieType::Node NodeType; - typedef typename NodeType::iterator ChildIteratorType; - - static inline NodeType *getEntryNode(const TrieType& T) { - return T.getRoot(); - } - - static inline ChildIteratorType child_begin(NodeType *N) { - return N->begin(); - } - static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } - - typedef typename std::vector<NodeType*>::const_iterator nodes_iterator; - - static inline nodes_iterator nodes_begin(const TrieType& G) { - return G.Nodes.begin(); - } - static inline nodes_iterator nodes_end(const TrieType& G) { - return G.Nodes.end(); - } - -}; - -template<class Payload> -struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits { - typedef typename Trie<Payload>::Node NodeType; - typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter; - - static std::string getGraphName(const Trie<Payload>& T) { - return "Trie"; - } - - static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) { - if (T.getRoot() == Node) - return "<Root>"; - else - return Node->label(); - } - - static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) { - NodeType* N = *I; - return N->label().substr(0, 1); - } - - static std::string getNodeAttributes(const NodeType* Node, - const Trie<Payload>& T) { - if (Node->data() != T.Empty) - return "color=blue"; - - return ""; - } - -}; - -} // end of llvm namespace - -#endif // LLVM_ADT_TRIE_H diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 7f7061ab01..ab1f0da51e 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -74,7 +74,8 @@ public: PC, SCEI, BGP, - BGQ + BGQ, + Freescale }; enum OSType { UnknownOS, @@ -109,7 +110,7 @@ public: GNUEABIHF, EABI, MachO, - ANDROIDEABI + Android }; private: diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 674868a026..f5872201ad 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -46,6 +46,7 @@ class LoadInst; class StoreInst; class VAArgInst; class TargetData; +class TargetLibraryInfo; class Pass; class AnalysisUsage; class MemTransferInst; @@ -55,6 +56,7 @@ class DominatorTree; class AliasAnalysis { protected: const TargetData *TD; + const TargetLibraryInfo *TLI; private: AliasAnalysis *AA; // Previous Alias Analysis to chain to. @@ -73,7 +75,7 @@ protected: public: static char ID; // Class identification, replacement for typeinfo - AliasAnalysis() : TD(0), AA(0) {} + AliasAnalysis() : TD(0), TLI(0), AA(0) {} virtual ~AliasAnalysis(); // We want to be subclassed /// UnknownSize - This is a special value which can be used with the @@ -86,6 +88,11 @@ public: /// const TargetData *getTargetData() const { return TD; } + /// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo + /// object, or null if no TargetLibraryInfo object is available. + /// + const TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } + /// getTypeStoreSize - Return the TargetData store size for the given type, /// if known, or a conservative value otherwise. /// @@ -187,6 +194,11 @@ public: return isNoAlias(Location(V1, V1Size), Location(V2, V2Size)); } + /// isNoAlias - A convenience wrapper. + bool isNoAlias(const Value *V1, const Value *V2) { + return isNoAlias(Location(V1), Location(V2)); + } + /// isMustAlias - A convenience wrapper. bool isMustAlias(const Location &LocA, const Location &LocB) { return alias(LocA, LocB) == MustAlias; diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 006daa0829..c0567daa3a 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -28,11 +28,14 @@ class raw_ostream; /// /// This is a function analysis pass which provides information on the relative /// probabilities of each "edge" in the function's CFG where such an edge is -/// defined by a pair of basic blocks. The probability for a given block and -/// a successor block are always relative to the probabilities of the other -/// successor blocks. Another way of looking at it is that the probabilities -/// for a given block B and each of its successors should sum to exactly -/// one (100%). +/// defined by a pair (PredBlock and an index in the successors). The +/// probability of an edge from one block is always relative to the +/// probabilities of other edges from the block. The probabilites of all edges +/// from a block sum to exactly one (100%). +/// We use a pair (PredBlock and an index in the successors) to uniquely +/// identify an edge, since we can have multiple edges from Src to Dst. +/// As an example, we can have a switch which jumps to Dst with value 0 and +/// value 10. class BranchProbabilityInfo : public FunctionPass { public: static char ID; @@ -52,6 +55,12 @@ public: /// leaving the 'Src' block. The returned probability is never zero, and can /// only be one if the source block has only one successor. BranchProbability getEdgeProbability(const BasicBlock *Src, + unsigned IndexInSuccessors) const; + + /// \brief Get the probability of going from Src to Dst. + /// + /// It returns the sum of all probabilities for edges from Src to Dst. + BranchProbability getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const; /// \brief Test if an edge is hot relative to other out-edges of the Src. @@ -74,25 +83,34 @@ public: raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const; - /// \brief Get the raw edge weight calculated for the block pair. + /// \brief Get the raw edge weight calculated for the edge. /// /// This returns the raw edge weight. It is guaranteed to fall between 1 and /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation. /// This interface should be very carefully, and primarily by routines that /// are updating the analysis by later calling setEdgeWeight. + uint32_t getEdgeWeight(const BasicBlock *Src, + unsigned IndexInSuccessors) const; + + /// \brief Get the raw edge weight calculated for the block pair. + /// + /// This returns the sum of all raw edge weights from Src to Dst. + /// It is guaranteed to fall between 1 and UINT32_MAX. uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const; - /// \brief Set the raw edge weight for the block pair. + /// \brief Set the raw edge weight for a given edge. /// - /// This allows a pass to explicitly set the edge weight for a block. It can + /// This allows a pass to explicitly set the edge weight for an edge. It can /// be used when updating the CFG to update and preserve the branch /// probability information. Read the implementation of how these edge /// weights are calculated carefully before using! - void setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, + void setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, uint32_t Weight); private: - typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; + // Since we allow duplicate edges from one basic block to another, we use + // a pair (PredBlock and an index in the successors) to specify an edge. + typedef std::pair<const BasicBlock *, unsigned> Edge; // Default weight value. Used when we don't have information about the edge. // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index c07fbf7aa8..3bb96f96bf 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -145,7 +145,6 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { // Loop over the predecessors of the header node... BlockT *Header = getHeader(); - typedef GraphTraits<BlockT*> BlockTraits; typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; for (typename InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(Header), diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index e16f3894f7..c3ae603b70 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -28,6 +28,7 @@ namespace llvm { class CallInst; class PointerType; class TargetData; +class TargetLibraryInfo; class Type; class Value; @@ -35,27 +36,33 @@ class Value; /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). -bool isAllocationFn(const Value *V, bool LookThroughBitCast = false); +bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); /// \brief Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). -bool isNoAliasFn(const Value *V, bool LookThroughBitCast = false); +bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); /// \brief Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). -bool isMallocLikeFn(const Value *V, bool LookThroughBitCast = false); +bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); /// \brief Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). -bool isCallocLikeFn(const Value *V, bool LookThroughBitCast = false); +bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). -bool isAllocLikeFn(const Value *V, bool LookThroughBitCast = false); +bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); /// \brief Tests if a value is a call or invoke to a library function that /// reallocates memory (such as realloc). -bool isReallocLikeFn(const Value *V, bool LookThroughBitCast = false); +bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); //===----------------------------------------------------------------------===// @@ -65,29 +72,31 @@ bool isReallocLikeFn(const Value *V, bool LookThroughBitCast = false); /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *extractMallocCall(const Value *I); -static inline CallInst *extractMallocCall(Value *I) { - return const_cast<CallInst*>(extractMallocCall((const Value*)I)); +const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); +static inline CallInst *extractMallocCall(Value *I, + const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI)); } /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -const CallInst *isArrayMalloc(const Value *I, const TargetData *TD); +const CallInst *isArrayMalloc(const Value *I, const TargetData *TD, + const TargetLibraryInfo *TLI); /// getMallocType - Returns the PointerType resulting from the malloc call. /// The PointerType depends on the number of bitcast uses of the malloc call: /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -PointerType *getMallocType(const CallInst *CI); +PointerType *getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI); /// getMallocAllocatedType - Returns the Type allocated by malloc call. /// The Type depends on the number of bitcast uses of the malloc call: /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -Type *getMallocAllocatedType(const CallInst *CI); +Type *getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI); /// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, @@ -95,6 +104,7 @@ Type *getMallocAllocatedType(const CallInst *CI); /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. Value *getMallocArraySize(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt = false); @@ -104,9 +114,10 @@ Value *getMallocArraySize(CallInst *CI, const TargetData *TD, /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. -const CallInst *extractCallocCall(const Value *I); -static inline CallInst *extractCallocCall(Value *I) { - return const_cast<CallInst*>(extractCallocCall((const Value*)I)); +const CallInst *extractCallocCall(const Value *I, const TargetLibraryInfo *TLI); +static inline CallInst *extractCallocCall(Value *I, + const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(extractCallocCall((const Value*)I, TLI)); } @@ -115,10 +126,10 @@ static inline CallInst *extractCallocCall(Value *I) { // /// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *isFreeCall(const Value *I); +const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *isFreeCall(Value *I) { - return const_cast<CallInst*>(isFreeCall((const Value*)I)); +static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { + return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI)); } @@ -131,7 +142,7 @@ static inline CallInst *isFreeCall(Value *I) { /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. bool getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, - bool RoundToAlign = false); + const TargetLibraryInfo *TLI, bool RoundToAlign = false); @@ -143,6 +154,7 @@ class ObjectSizeOffsetVisitor : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> { const TargetData *TD; + const TargetLibraryInfo *TLI; bool RoundToAlign; unsigned IntTyBits; APInt Zero; @@ -155,8 +167,8 @@ class ObjectSizeOffsetVisitor } public: - ObjectSizeOffsetVisitor(const TargetData *TD, LLVMContext &Context, - bool RoundToAlign = false); + ObjectSizeOffsetVisitor(const TargetData *TD, const TargetLibraryInfo *TLI, + LLVMContext &Context, bool RoundToAlign = false); SizeOffsetType compute(Value *V); @@ -202,6 +214,7 @@ class ObjectSizeOffsetEvaluator typedef SmallPtrSet<const Value*, 8> PtrSetTy; const TargetData *TD; + const TargetLibraryInfo *TLI; LLVMContext &Context; BuilderTy Builder; IntegerType *IntTy; @@ -215,7 +228,8 @@ class ObjectSizeOffsetEvaluator SizeOffsetEvalType compute_(Value *V); public: - ObjectSizeOffsetEvaluator(const TargetData *TD, LLVMContext &Context); + ObjectSizeOffsetEvaluator(const TargetData *TD, const TargetLibraryInfo *TLI, + LLVMContext &Context); SizeOffsetEvalType compute(Value *V); bool knownSize(SizeOffsetEvalType SizeOffset) { diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index a22bd12dec..c52f846b5c 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -103,6 +103,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // + // createProfileMetadataLoaderPass - This pass loads information from a + // profile dump file and sets branch weight metadata. + // + ModulePass *createProfileMetadataLoaderPass(); + extern char &ProfileMetadataLoaderPassID; + + //===--------------------------------------------------------------------===// + // // createNoProfileInfoPass - This pass implements the default "no profile". // ImmutablePass *createNoProfileInfoPass(); diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h new file mode 100644 index 0000000000..bec9fac770 --- /dev/null +++ b/include/llvm/Analysis/ProfileDataLoader.h @@ -0,0 +1,142 @@ +//===- ProfileDataLoader.h - Load & convert profile info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load profiling data from a dump file. +// The ProfileDataT<FType, BType> class is used to store the mapping of this +// data to control flow edges. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H +#define LLVM_ANALYSIS_PROFILEDATALOADER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include <string> + +namespace llvm { + +class ModulePass; +class Function; +class BasicBlock; + +// Helper for dumping edges to dbgs(). +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, + const BasicBlock *> E); + +/// \brief The ProfileDataT<FType, BType> class is used to store the mapping of +/// profiling data to control flow edges. +/// +/// An edge is defined by its source and sink basic blocks. +template<class FType, class BType> +class ProfileDataT { +public: + // The profiling information defines an Edge by its source and sink basic + // blocks. + typedef std::pair<const BType*, const BType*> Edge; + +private: + typedef DenseMap<Edge, unsigned> EdgeWeights; + + /// \brief Count the number of times a transition between two blocks is + /// executed. + /// + /// As a special case, we also hold an edge from the null BasicBlock to the + /// entry block to indicate how many times the function was entered. + DenseMap<const FType*, EdgeWeights> EdgeInformation; + +public: + /// getFunction() - Returns the Function for an Edge. + static const FType *getFunction(Edge e) { + // e.first may be NULL + assert(((!e.first) || (e.first->getParent() == e.second->getParent())) + && "A ProfileData::Edge can not be between two functions"); + assert(e.second && "A ProfileData::Edge must have a real sink"); + return e.second->getParent(); + } + + /// getEdge() - Creates an Edge between two BasicBlocks. + static Edge getEdge(const BType *Src, const BType *Dest) { + return Edge(Src, Dest); + } + + /// getEdgeWeight - Return the number of times that a given edge was + /// executed. + unsigned getEdgeWeight(Edge e) const { + const FType *f = getFunction(e); + assert((EdgeInformation.find(f) != EdgeInformation.end()) + && "No profiling information for function"); + EdgeWeights weights = EdgeInformation.find(f)->second; + + assert((weights.find(e) != weights.end()) + && "No profiling information for edge"); + return weights.find(e)->second; + } + + /// addEdgeWeight - Add 'weight' to the already stored execution count for + /// this edge. + void addEdgeWeight(Edge e, unsigned weight) { + EdgeInformation[getFunction(e)][e] += weight; + } +}; + +typedef ProfileDataT<Function, BasicBlock> ProfileData; +//typedef ProfileDataT<MachineFunction, MachineBasicBlock> MachineProfileData; + +/// The ProfileDataLoader class is used to load raw profiling data from the +/// dump file. +class ProfileDataLoader { +private: + /// The name of the file where the raw profiling data is stored. + const std::string &Filename; + + /// A vector of the command line arguments used when the target program was + /// run to generate profiling data. One entry per program run. + SmallVector<std::string, 1> CommandLines; + + /// The raw values for how many times each edge was traversed, values from + /// multiple program runs are accumulated. + SmallVector<unsigned, 32> EdgeCounts; + +public: + /// ProfileDataLoader ctor - Read the specified profiling data file, exiting + /// the program if the file is invalid or broken. + ProfileDataLoader(const char *ToolName, const std::string &Filename); + + /// A special value used to represent the weight of an edge which has not + /// been counted yet. + static const unsigned Uncounted; + + /// The maximum value that can be stored in a profiling counter. + static const unsigned MaxCount; + + /// getNumExecutions - Return the number of times the target program was run + /// to generate this profiling data. + unsigned getNumExecutions() const { return CommandLines.size(); } + + /// getExecution - Return the command line parameters used to generate the + /// i'th set of profiling data. + const std::string &getExecution(unsigned i) const { return CommandLines[i]; } + + const std::string &getFileName() const { return Filename; } + + /// getRawEdgeCounts - Return the raw profiling data, this is just a list of + /// numbers with no mappings to edges. + ArrayRef<unsigned> getRawEdgeCounts() const { return EdgeCounts; } +}; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename. +ModulePass *createProfileMetadataLoaderPass(const std::string &Filename); + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/ProfileDataTypes.h b/include/llvm/Analysis/ProfileDataTypes.h new file mode 100644 index 0000000000..1be15e025d --- /dev/null +++ b/include/llvm/Analysis/ProfileDataTypes.h @@ -0,0 +1,39 @@ +/*===-- ProfileDataTypes.h - Profiling info shared constants --------------===*\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +|*===----------------------------------------------------------------------===*| +|* +|* This file defines constants shared by the various different profiling +|* runtime libraries and the LLVM C++ profile metadata loader. It must be a +|* C header because, at present, the profiling runtimes are written in C. +|* +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_ANALYSIS_PROFILEDATATYPES_H +#define LLVM_ANALYSIS_PROFILEDATATYPES_H + +/* Included by libprofile. */ +#if defined(__cplusplus) +extern "C" { +#endif + +/* TODO: Strip out unused entries once ProfileInfo etc has been removed. */ +enum ProfilingType { + ArgumentInfo = 1, /* The command line argument block */ + FunctionInfo = 2, /* Function profiling information */ + BlockInfo = 3, /* Block profiling information */ + EdgeInfo = 4, /* Edge profiling information */ + PathInfo = 5, /* Path profiling information */ + BBTraceInfo = 6, /* Basic block trace information */ + OptEdgeInfo = 7 /* Edge profiling information, optimal version */ +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* LLVM_ANALYSIS_PROFILEDATATYPES_H */ diff --git a/include/llvm/Analysis/ProfileInfoTypes.h b/include/llvm/Analysis/ProfileInfoTypes.h index 6b4ac85082..45aab5b70d 100644 --- a/include/llvm/Analysis/ProfileInfoTypes.h +++ b/include/llvm/Analysis/ProfileInfoTypes.h @@ -27,15 +27,7 @@ enum ProfilingStorageType { ProfilingHash = 2 }; -enum ProfilingType { - ArgumentInfo = 1, /* The command line argument block */ - FunctionInfo = 2, /* Function profiling information */ - BlockInfo = 3, /* Block profiling information */ - EdgeInfo = 4, /* Edge profiling information */ - PathInfo = 5, /* Path profiling information */ - BBTraceInfo = 6, /* Basic block trace information */ - OptEdgeInfo = 7 /* Edge profiling information, optimal version */ -}; +#include "llvm/Analysis/ProfileDataTypes.h" /* * The header for tables that map path numbers to path counters. diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 188d11c283..e62040e2ee 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -473,27 +473,6 @@ public: const_iterator end() const { return children.end(); } //@} - /// @name BasicBlock Node Iterators - /// - /// These iterators iterate over all BasicBlock RegionNodes that are - /// contained in this Region. The iterator also iterates over BasicBlock - /// RegionNodes that are elements of a subregion of this Region. It is - /// therefore called a flat iterator. - //@{ - typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false, - GraphTraits<FlatIt<RegionNode*> > > block_node_iterator; - - typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>, - false, GraphTraits<FlatIt<const RegionNode*> > > - const_block_node_iterator; - - block_node_iterator block_node_begin(); - block_node_iterator block_node_end(); - - const_block_node_iterator block_node_begin() const; - const_block_node_iterator block_node_end() const; - //@} - /// @name BasicBlock Iterators /// /// These iterators iterate over all BasicBlocks that are contained in this diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 223aa00639..0228d8691d 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -134,9 +134,6 @@ DECLARE_LLVM_ATTRIBUTE(NonLazyBind,1U<<31) ///< Function is called early and/or /// often, so lazy binding isn't /// worthwhile. DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is on. -DECLARE_LLVM_ATTRIBUTE(IANSDialect,1ULL<<33) ///< Inline asm non-standard dialect. - /// When not set, ATT dialect assumed. - /// When set implies the Intel dialect. #undef DECLARE_LLVM_ATTRIBUTE @@ -162,8 +159,7 @@ const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i | ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i | StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i | Naked_i | InlineHint_i | StackAlignment_i | - UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i | - IANSDialect_i}; + UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i}; /// @brief Parameter attributes that do not apply to vararg call arguments. const AttrConst VarArgsIncompatible = {StructRet_i}; diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h index 3c75e5882d..c1c3ec044b 100644 --- a/include/llvm/Bitcode/Archive.h +++ b/include/llvm/Bitcode/Archive.h @@ -415,8 +415,8 @@ class Archive { /// name will be truncated at 15 characters. If \p Compress is specified, /// all archive members will be compressed before being written. If /// \p PrintSymTab is true, the symbol table will be printed to std::cout. - /// @returns true if an error occurred, \p error set to error message - /// @returns false if the writing succeeded. + /// @returns true if an error occurred, \p error set to error message; + /// returns false if the writing succeeded. /// @brief Write (possibly modified) archive contents to disk bool writeToDisk( bool CreateSymbolTable=false, ///< Create Symbol table @@ -480,8 +480,8 @@ class Archive { /// Writes one ArchiveMember to an ofstream. If an error occurs, returns /// false, otherwise true. If an error occurs and error is non-null then /// it will be set to an error message. - /// @returns false Writing member succeeded - /// @returns true Writing member failed, \p error set to error message + /// @returns false if writing member succeeded, + /// returns true if writing member failed, \p error set to error message. bool writeMember( const ArchiveMember& member, ///< The member to be written std::ofstream& ARFile, ///< The file to write member onto diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index a8c34cb829..c1dc190304 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -161,11 +161,14 @@ namespace bitc { CST_CODE_CE_INSERTELT = 15, // CE_INSERTELT: [opval, opval, opval] CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] - CST_CODE_INLINEASM = 18, // INLINEASM: [sideeffect,asmstr,conststr] + CST_CODE_INLINEASM_OLD = 18, // INLINEASM: [sideeffect|alignstack, + // asmstr,conststr] CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP: [n x operands] CST_CODE_BLOCKADDRESS = 21, // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] - CST_CODE_DATA = 22 // DATA: [n x elements] + CST_CODE_DATA = 22, // DATA: [n x elements] + CST_CODE_INLINEASM = 23 // INLINEASM: [sideeffect|alignstack| + // asmdialect,asmstr,conststr] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index ad214ccb07..b8d435ee13 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_ASMPRINTER_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InlineAsm.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -483,7 +484,8 @@ namespace llvm { mutable unsigned SetCounter; /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. - void EmitInlineAsm(StringRef Str, const MDNode *LocMDNode = 0) const; + void EmitInlineAsm(StringRef Str, const MDNode *LocMDNode = 0, + InlineAsm::AsmDialect AsmDialect = InlineAsm::AD_ATT) const; /// EmitInlineAsm - This method formats and emits the specified machine /// instruction that is an inline asm. diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 2d8181cf6a..95aafb324d 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -637,6 +637,10 @@ namespace ISD { ATOMIC_LOAD_UMIN, ATOMIC_LOAD_UMAX, + /// This corresponds to the llvm.lifetime.* intrinsics. The first operand + /// is the chain and the second operand is the alloca pointer. + LIFETIME_START, LIFETIME_END, + // @LOCALMOD-BEGIN // NACL_* - Native Client instrinsics. // These correspond to functions in: diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index a3ce47c02a..5aeb1a8c31 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -29,6 +29,7 @@ #include <climits> namespace llvm { + class CoalescerPair; class LiveIntervals; class MachineInstr; class MachineRegisterInfo; @@ -366,6 +367,14 @@ namespace llvm { return overlapsFrom(other, other.begin()); } + /// overlaps - Return true if the two intervals have overlapping segments + /// that are not coalescable according to CP. + /// + /// Overlapping segments where one interval is defined by a coalescable + /// copy are allowed. + bool overlaps(const LiveInterval &Other, const CoalescerPair &CP, + const SlotIndexes&) const; + /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). bool overlaps(SlotIndex Start, SlotIndex End) const; diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index da521dbc53..bf7469093a 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -252,7 +252,7 @@ namespace llvm { /// addKillFlags - Add kill flags to any instruction that kills a virtual /// register. - void addKillFlags(); + void addKillFlags(const VirtRegMap*); /// handleMove - call this method to notify LiveIntervals that /// instruction 'mi' has been moved within a basic block. This will update diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 8b958e437e..3c07cebfcc 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -28,6 +28,7 @@ class MachineFunction; class MachineBasicBlock; class TargetFrameLowering; class BitVector; +class Value; /// The CalleeSavedInfo class tracks the information need to locate where a /// callee saved register is in the current frame. @@ -103,14 +104,18 @@ class MachineFrameInfo { // protector. bool MayNeedSP; + /// Alloca - If this stack object is originated from an Alloca instruction + /// this value saves the original IR allocation. Can be NULL. + const Value *Alloca; + // PreAllocated - If true, the object was mapped into the local frame // block and doesn't need additional handling for allocation beyond that. bool PreAllocated; StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS, bool NSP) + bool isSS, bool NSP, const Value *Val) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {} + isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {} }; /// Objects - The list of stack objects allocated... @@ -362,6 +367,14 @@ public: ensureMaxAlignment(Align); } + /// getObjectAllocation - Return the underlying Alloca of the specified + /// stack object if it exists. Returns 0 if none exists. + const Value* getObjectAllocation(int ObjectIdx) const { + assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return Objects[ObjectIdx+NumFixedObjects].Alloca; + } + /// NeedsStackProtector - Returns true if the object may need stack /// protectors. bool MayNeedStackProtector(int ObjectIdx) const { @@ -482,9 +495,10 @@ public: /// a nonnegative identifier to represent it. /// int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, - bool MayNeedSP = false) { + bool MayNeedSP = false, const Value *Alloca = 0) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP)); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, + Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -516,7 +530,7 @@ public: /// int CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - Objects.push_back(StackObject(0, Alignment, 0, false, false, true)); + Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 062c7508c4..0eb9d0e509 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -138,15 +138,19 @@ public: MachineModuleInfo &getMMI() const { return MMI; } GCModuleInfo *getGMI() const { return GMI; } MCContext &getContext() const { return Ctx; } - + /// getFunction - Return the LLVM function that this machine code represents /// const Function *getFunction() const { return Fn; } + /// getName - Return the name of the corresponding LLVM function. + /// + StringRef getName() const; + /// getFunctionNumber - Return a unique ID for the current function. /// unsigned getFunctionNumber() const { return FunctionNumber; } - + /// getTarget - Return the target machine this machine code is compiled with /// const TargetMachine &getTarget() const { return Target; } diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 27756abf3f..4e1533a8e7 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -25,6 +25,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/InlineAsm.h" #include "llvm/Support/DebugLoc.h" #include <vector> @@ -610,6 +611,7 @@ public: bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } bool isStackAligningInlineAsm() const; + InlineAsm::AsmDialect getInlineAsmDialect() const; bool isInsertSubreg() const { return getOpcode() == TargetOpcode::INSERT_SUBREG; } @@ -782,16 +784,43 @@ public: const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; + /// tieOperands - Add a tie between the register operands at DefIdx and + /// UseIdx. The tie will cause the register allocator to ensure that the two + /// operands are assigned the same physical register. + /// + /// Tied operands are managed automatically for explicit operands in the + /// MCInstrDesc. This method is for exceptional cases like inline asm. + void tieOperands(unsigned DefIdx, unsigned UseIdx); + + /// findTiedOperandIdx - Given the index of a tied register operand, find the + /// operand it is tied to. Defs are tied to uses and vice versa. Returns the + /// index of the tied operand which must exist. + unsigned findTiedOperandIdx(unsigned OpIdx) const; + /// isRegTiedToUseOperand - Given the index of a register def operand, /// check if the register def is tied to a source operand, due to either /// two-address elimination or inline assembly constraints. Returns the /// first tied use operand index by reference if UseOpIdx is not null. - bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx = 0) const; + bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx = 0) const { + const MachineOperand &MO = getOperand(DefOpIdx); + if (!MO.isReg() || !MO.isDef() || !MO.isTied()) + return false; + if (UseOpIdx) + *UseOpIdx = findTiedOperandIdx(DefOpIdx); + return true; + } /// isRegTiedToDefOperand - Return true if the use operand of the specified /// index is tied to an def operand. It also returns the def operand index by /// reference if DefOpIdx is not null. - bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx = 0) const; + bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx = 0) const { + const MachineOperand &MO = getOperand(UseOpIdx); + if (!MO.isReg() || !MO.isUse() || !MO.isTied()) + return false; + if (DefOpIdx) + *DefOpIdx = findTiedOperandIdx(UseOpIdx); + return true; + } /// clearKillInfo - Clears kill flags on all operands. /// @@ -852,11 +881,11 @@ public: bool isSafeToReMat(const TargetInstrInfo *TII, AliasAnalysis *AA, unsigned DstReg) const; - /// hasVolatileMemoryRef - Return true if this instruction may have a - /// volatile memory reference, or if the information describing the - /// memory reference is not available. Return false if it is known to - /// have no volatile memory references. - bool hasVolatileMemoryRef() const; + /// hasOrderedMemoryRef - Return true if this instruction may have an ordered + /// or volatile memory reference, or if the information describing the memory + /// reference is not available. Return false if it is known to have no + /// ordered or volatile memory references. + bool hasOrderedMemoryRef() const; /// isInvariantLoad - Return true if this instruction is loading from a /// location whose value is invariant across the function. For example, @@ -935,6 +964,15 @@ private: /// return null. MachineRegisterInfo *getRegInfo(); + /// untieRegOperand - Break any tie involving OpIdx. + void untieRegOperand(unsigned OpIdx) { + MachineOperand &MO = getOperand(OpIdx); + if (MO.isReg() && MO.isTied()) { + getOperand(findTiedOperandIdx(OpIdx)).TiedTo = 0; + MO.TiedTo = 0; + } + } + /// addImplicitDefUseOperands - Add all implicit def and use operands to /// this instruction. void addImplicitDefUseOperands(); diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 1ac9080b75..ddb127120f 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -151,6 +151,15 @@ public: bool isNonTemporal() const { return Flags & MONonTemporal; } bool isInvariant() const { return Flags & MOInvariant; } + /// isUnordered - Returns true if this memory operation doesn't have any + /// ordering constraints other than normal aliasing. Volatile and atomic + /// memory operations can't be reordered. + /// + /// Currently, we don't model the difference between volatile and atomic + /// operations. They should retain their ordering relative to all memory + /// operations. + bool isUnordered() const { return !isVolatile(); } + /// refineAlignment - Update this MachineMemOperand to reflect the alignment /// of MMO, if it has a greater alignment. This must only be used when the /// new alignment applies to all users of this MachineMemOperand. diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 37d42b3583..0b9d67f37a 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -14,7 +14,6 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H -#include "llvm/ADT/Hashing.h" #include "llvm/Support/DataTypes.h" #include <cassert> @@ -30,6 +29,7 @@ class MachineRegisterInfo; class MDNode; class TargetMachine; class TargetRegisterInfo; +class hash_code; class raw_ostream; class MCSymbol; @@ -60,12 +60,20 @@ private: /// union. unsigned char OpKind; // MachineOperandType - /// SubReg - Subregister number, only valid for MO_Register. A value of 0 - /// indicates the MO_Register has no subReg. - unsigned char SubReg; + // This union is discriminated by OpKind. + union { + /// SubReg - Subregister number, only valid for MO_Register. A value of 0 + /// indicates the MO_Register has no subReg. + unsigned char SubReg; + + /// TargetFlags - This is a set of target-specific operand flags. + unsigned char TargetFlags; + }; - /// TargetFlags - This is a set of target-specific operand flags. - unsigned char TargetFlags; + /// TiedTo - Non-zero when this register operand is tied to another register + /// operand. The encoding of this field is described in the block comment + /// before MachineInstr::tieOperands(). + unsigned char TiedTo : 4; /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register /// operands. @@ -176,9 +184,17 @@ public: /// MachineOperandType getType() const { return (MachineOperandType)OpKind; } - unsigned char getTargetFlags() const { return TargetFlags; } - void setTargetFlags(unsigned char F) { TargetFlags = F; } - void addTargetFlag(unsigned char F) { TargetFlags |= F; } + unsigned char getTargetFlags() const { + return isReg() ? 0 : TargetFlags; + } + void setTargetFlags(unsigned char F) { + assert(!isReg() && "Register operands can't have target flags"); + TargetFlags = F; + } + void addTargetFlag(unsigned char F) { + assert(!isReg() && "Register operands can't have target flags"); + TargetFlags |= F; + } /// getParent - Return the instruction that this operand belongs to. @@ -288,6 +304,11 @@ public: return IsEarlyClobber; } + bool isTied() const { + assert(isReg() && "Wrong MachineOperand accessor"); + return TiedTo; + } + bool isDebug() const { assert(isReg() && "Wrong MachineOperand accessor"); return IsDebug; @@ -421,7 +442,7 @@ public: int64_t getOffset() const { assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() || isBlockAddress()) && "Wrong MachineOperand accessor"); - return (int64_t(Contents.OffsetedInfo.OffsetHi) << 32) | + return int64_t(uint64_t(Contents.OffsetedInfo.OffsetHi) << 32) | SmallContents.OffsetLo; } @@ -548,6 +569,7 @@ public: Op.IsUndef = isUndef; Op.IsInternalRead = isInternalRead; Op.IsEarlyClobber = isEarlyClobber; + Op.TiedTo = 0; Op.IsDebug = isDebug; Op.SmallContents.RegNo = Reg; Op.Contents.Reg.Prev = 0; diff --git a/include/llvm/CodeGen/PBQP/HeuristicBase.h b/include/llvm/CodeGen/PBQP/HeuristicBase.h index 3fee18cc42..0c1fcb7eaf 100644 --- a/include/llvm/CodeGen/PBQP/HeuristicBase.h +++ b/include/llvm/CodeGen/PBQP/HeuristicBase.h @@ -113,7 +113,7 @@ namespace PBQP { } /// \brief Add the given node to the list of nodes to be optimally reduced. - /// @return nItr Node iterator to be added. + /// @param nItr Node iterator to be added. /// /// You probably don't want to over-ride this, except perhaps to record /// statistics before calling this implementation. HeuristicBase relies on @@ -193,8 +193,9 @@ namespace PBQP { /// reduce list. /// @return True if a reduction takes place, false if the heuristic reduce /// list is empty. - void heuristicReduce() { + bool heuristicReduce() { llvm_unreachable("Must be implemented in derived class."); + return false; } /// \brief Prepare a change in the costs on the given edge. diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 07b3b45873..7bd576494e 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -404,6 +404,10 @@ namespace llvm { /// inserting cmov instructions. extern char &EarlyIfConverterID; + /// StackSlotColoring - This pass performs stack coloring and merging. + /// It merges disjoint allocas to reduce the stack size. + extern char &StackColoringID; + /// IfConverter - This pass performs machine code if conversion. extern char &IfConverterID; diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 85ab47beb6..2567a65733 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -85,6 +85,8 @@ namespace llvm { /// the value of the Latency field of the predecessor, however advanced /// models may provide additional information about specific edges. unsigned Latency; + /// Record MinLatency seperately from "expected" Latency. + unsigned MinLatency; public: /// SDep - Construct a null SDep. This is only for use by container @@ -96,7 +98,7 @@ namespace llvm { SDep(SUnit *S, Kind kind, unsigned latency = 1, unsigned Reg = 0, bool isNormalMemory = false, bool isMustAlias = false, bool isArtificial = false) - : Dep(S, kind), Contents(), Latency(latency) { + : Dep(S, kind), Contents(), Latency(latency), MinLatency(latency) { switch (kind) { case Anti: case Output: @@ -135,7 +137,8 @@ namespace llvm { } bool operator==(const SDep &Other) const { - return overlaps(Other) && Latency == Other.Latency; + return overlaps(Other) + && Latency == Other.Latency && MinLatency == Other.MinLatency; } bool operator!=(const SDep &Other) const { @@ -155,6 +158,18 @@ namespace llvm { Latency = Lat; } + /// getMinLatency - Return the minimum latency for this edge. Minimum + /// latency is used for scheduling groups, while normal (expected) latency + /// is for instruction cost and critical path. + unsigned getMinLatency() const { + return MinLatency; + } + + /// setMinLatency - Set the minimum latency for this edge. + void setMinLatency(unsigned Lat) { + MinLatency = Lat; + } + //// getSUnit - Return the SUnit to which this edge points. SUnit *getSUnit() const { return Dep.getPointer(); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 1bde94215a..8b52b5a9c7 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -108,6 +108,15 @@ namespace llvm { } }; + /// Record a physical register access. + /// For non data-dependent uses, OpIdx == -1. + struct PhysRegSUOper { + SUnit *SU; + int OpIdx; + + PhysRegSUOper(SUnit *su, int op): SU(su), OpIdx(op) {} + }; + /// Combine a SparseSet with a 1x1 vector to track physical registers. /// The SparseSet allows iterating over the (few) live registers for quickly /// comparing against a regmask or clearing the set. @@ -116,7 +125,7 @@ namespace llvm { /// cleared between scheduling regions without freeing unused entries. class Reg2SUnitsMap { SparseSet<unsigned> PhysRegSet; - std::vector<std::vector<SUnit*> > SUnits; + std::vector<std::vector<PhysRegSUOper> > SUnits; public: typedef SparseSet<unsigned>::const_iterator const_iterator; @@ -140,7 +149,7 @@ namespace llvm { /// If this register is mapped, return its existing SUnits vector. /// Otherwise map the register and return an empty SUnits vector. - std::vector<SUnit *> &operator[](unsigned Reg) { + std::vector<PhysRegSUOper> &operator[](unsigned Reg) { bool New = PhysRegSet.insert(Reg).second; assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); (void)New; @@ -288,16 +297,6 @@ namespace llvm { /// virtual void computeLatency(SUnit *SU); - /// computeOperandLatency - Return dependence edge latency using - /// operand use/def information - /// - /// FindMin may be set to get the minimum vs. expected latency. Minimum - /// latency is used for scheduling groups, while expected latency is for - /// instruction cost and critical path. - virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin = false) const; - /// schedule - Order nodes according to selected style, filling /// in the Sequence member. /// @@ -319,7 +318,7 @@ namespace llvm { protected: void initSUnits(); - void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); + void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index db361ee9b1..3bea2ded68 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1011,11 +1011,6 @@ class AtomicSDNode : public MemSDNode { SubclassData |= SynchScope << 12; assert(getOrdering() == Ordering && "Ordering encoding error!"); assert(getSynchScope() == SynchScope && "Synch-scope encoding error!"); - - assert((readMem() || getOrdering() <= Monotonic) && - "Acquire/Release MachineMemOperand must be a load!"); - assert((writeMem() || getOrdering() <= Monotonic) && - "Acquire/Release MachineMemOperand must be a store!"); } public: @@ -1750,10 +1745,10 @@ public: class SDNodeIterator : public std::iterator<std::forward_iterator_tag, SDNode, ptrdiff_t> { - SDNode *Node; + const SDNode *Node; unsigned Operand; - SDNodeIterator(SDNode *N, unsigned Op) : Node(N), Operand(Op) {} + SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} public: bool operator==(const SDNodeIterator& x) const { return Operand == x.Operand; @@ -1784,8 +1779,8 @@ public: return Operand - Other.Operand; } - static SDNodeIterator begin(SDNode *N) { return SDNodeIterator(N, 0); } - static SDNodeIterator end (SDNode *N) { + static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } + static SDNodeIterator end (const SDNode *N) { return SDNodeIterator(N, N->getNumOperands()); } diff --git a/include/llvm/Config/AsmParsers.def.in b/include/llvm/Config/AsmParsers.def.in index 041af83754..d63675351c 100644 --- a/include/llvm/Config/AsmParsers.def.in +++ b/include/llvm/Config/AsmParsers.def.in @@ -1,24 +1,24 @@ -//===- llvm/Config/AsmParsers.def - LLVM Assembly Parsers -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file enumerates all of the assembly-language parsers -// supported by this build of LLVM. Clients of this file should define -// the LLVM_ASM_PARSER macro to be a function-like macro with a -// single parameter (the name of the target whose assembly can be -// generated); including this file will then enumerate all of the -// targets with assembly parsers. -// -// The set of targets supported by LLVM is generated at configuration -// time, at which point this header is generated. Do not modify this -// header directly. -// -//===----------------------------------------------------------------------===// +/*===- llvm/Config/AsmParsers.def - LLVM Assembly Parsers -------*- C++ -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file enumerates all of the assembly-language parsers *| +|* supported by this build of LLVM. Clients of this file should define *| +|* the LLVM_ASM_PARSER macro to be a function-like macro with a *| +|* single parameter (the name of the target whose assembly can be *| +|* generated); including this file will then enumerate all of the *| +|* targets with assembly parsers. *| +|* *| +|* The set of targets supported by LLVM is generated at configuration *| +|* time, at which point this header is generated. Do not modify this *| +|* header directly. *| +|* *| +\*===----------------------------------------------------------------------===*/ #ifndef LLVM_ASM_PARSER # error Please define the macro LLVM_ASM_PARSER(TargetName) diff --git a/include/llvm/Config/AsmPrinters.def.in b/include/llvm/Config/AsmPrinters.def.in index 9729bd75eb..f0152a4aa9 100644 --- a/include/llvm/Config/AsmPrinters.def.in +++ b/include/llvm/Config/AsmPrinters.def.in @@ -1,24 +1,24 @@ -//===- llvm/Config/AsmPrinters.def - LLVM Assembly Printers -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file enumerates all of the assembly-language printers -// supported by this build of LLVM. Clients of this file should define -// the LLVM_ASM_PRINTER macro to be a function-like macro with a -// single parameter (the name of the target whose assembly can be -// generated); including this file will then enumerate all of the -// targets with assembly printers. -// -// The set of targets supported by LLVM is generated at configuration -// time, at which point this header is generated. Do not modify this -// header directly. -// -//===----------------------------------------------------------------------===// +/*===- llvm/Config/AsmPrinters.def - LLVM Assembly Printers -----*- C++ -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file enumerates all of the assembly-language printers *| +|* supported by this build of LLVM. Clients of this file should define *| +|* the LLVM_ASM_PRINTER macro to be a function-like macro with a *| +|* single parameter (the name of the target whose assembly can be *| +|* generated); including this file will then enumerate all of the *| +|* targets with assembly printers. *| +|* *| +|* The set of targets supported by LLVM is generated at configuration *| +|* time, at which point this header is generated. Do not modify this *| +|* header directly. *| +|* *| +\*===----------------------------------------------------------------------===*/ #ifndef LLVM_ASM_PRINTER # error Please define the macro LLVM_ASM_PRINTER(TargetName) diff --git a/include/llvm/Config/Disassemblers.def.in b/include/llvm/Config/Disassemblers.def.in index 1e6281de99..d3a9bbdeae 100644 --- a/include/llvm/Config/Disassemblers.def.in +++ b/include/llvm/Config/Disassemblers.def.in @@ -1,24 +1,24 @@ -//===- llvm/Config/Disassemblers.def - LLVM Assembly Parsers ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file enumerates all of the assembly-language parsers -// supported by this build of LLVM. Clients of this file should define -// the LLVM_DISASSEMBLER macro to be a function-like macro with a -// single parameter (the name of the target whose assembly can be -// generated); including this file will then enumerate all of the -// targets with assembly parsers. -// -// The set of targets supported by LLVM is generated at configuration -// time, at which point this header is generated. Do not modify this -// header directly. -// -//===----------------------------------------------------------------------===// +/*===- llvm/Config/Disassemblers.def - LLVM Assembly Parsers ----*- C++ -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file enumerates all of the assembly-language parsers *| +|* supported by this build of LLVM. Clients of this file should define *| +|* the LLVM_DISASSEMBLER macro to be a function-like macro with a *| +|* single parameter (the name of the target whose assembly can be *| +|* generated); including this file will then enumerate all of the *| +|* targets with assembly parsers. *| +|* *| +|* The set of targets supported by LLVM is generated at configuration *| +|* time, at which point this header is generated. Do not modify this *| +|* header directly. *| +|* *| +\*===----------------------------------------------------------------------===*/ #ifndef LLVM_DISASSEMBLER # error Please define the macro LLVM_DISASSEMBLER(TargetName) diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index b912251239..eb20b6470b 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -51,7 +51,7 @@ #cmakedefine HAVE_ASSERT_H ${HAVE_ASSERT_H} /* Define to 1 if you have the `backtrace' function. */ -#undef HAVE_BACKTRACE +#cmakedefine HAVE_BACKTRACE ${HAVE_BACKTRACE} /* Define to 1 if you have the `bcopy' function. */ #undef HAVE_BCOPY diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h index 2ed48a944e..dd4ea96ae2 100644 --- a/include/llvm/DIBuilder.h +++ b/include/llvm/DIBuilder.h @@ -179,8 +179,10 @@ namespace llvm { /// @param Ty Parent type. /// @param PropertyName Name of the Objective C property associated with /// this ivar. - /// @param GetterName Name of the Objective C property getter selector. - /// @param SetterName Name of the Objective C property setter selector. + /// @param PropertyGetterName Name of the Objective C property getter + /// selector. + /// @param PropertySetterName Name of the Objective C property setter + /// selector. /// @param PropertyAttributes Objective C property attributes. DIType createObjCIVar(StringRef Name, DIFile File, unsigned LineNo, uint64_t SizeInBits, @@ -201,7 +203,7 @@ namespace llvm { /// @param OffsetInBits Member offset. /// @param Flags Flags to encode member attribute, e.g. private /// @param Ty Parent type. - /// @param Property Property associated with this ivar. + /// @param PropertyNode Property associated with this ivar. DIType createObjCIVar(StringRef Name, DIFile File, unsigned LineNo, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, @@ -228,7 +230,7 @@ namespace llvm { /// @param Scope Scope in which this class is defined. /// @param Name class name. /// @param File File where this member is defined. - /// @param LineNo Line number. + /// @param LineNumber Line number. /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param OffsetInBits Member offset. @@ -250,7 +252,7 @@ namespace llvm { /// @param Scope Scope in which this struct is defined. /// @param Name Struct name. /// @param File File where this member is defined. - /// @param LineNo Line number. + /// @param LineNumber Line number. /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param Flags Flags to encode member attribute, e.g. private @@ -265,7 +267,7 @@ namespace llvm { /// @param Scope Scope in which this union is defined. /// @param Name Union name. /// @param File File where this member is defined. - /// @param LineNo Line number. + /// @param LineNumber Line number. /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param Flags Flags to encode member attribute, e.g. private @@ -325,7 +327,7 @@ namespace llvm { /// @param Scope Scope in which this enumeration is defined. /// @param Name Union name. /// @param File File where this member is defined. - /// @param LineNo Line number. + /// @param LineNumber Line number. /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param Elements Enumeration elements. @@ -337,9 +339,9 @@ namespace llvm { unsigned Flags); /// createSubroutineType - Create subroutine type. - /// @param File File in which this subroutine is defined. - /// @param ParamterTypes An array of subroutine parameter types. This - /// includes return type at 0th index. + /// @param File File in which this subroutine is defined. + /// @param ParameterTypes An array of subroutine parameter types. This + /// includes return type at 0th index. DIType createSubroutineType(DIFile File, DIArray ParameterTypes); /// createArtificialType - Create a new DIType with "artificial" flag set. @@ -383,9 +385,9 @@ namespace llvm { /// createStaticVariable - Create a new descriptor for the specified /// variable. - /// @param Conext Variable scope. + /// @param Context Variable scope. /// @param Name Name of the variable. - /// @param LinakgeName Mangled name of the variable. + /// @param LinkageName Mangled name of the variable. /// @param File File where this variable is defined. /// @param LineNo Line number. /// @param Ty Variable Type. @@ -426,7 +428,7 @@ namespace llvm { /// DW_TAG_arg_variable. /// @param Scope Variable scope. /// @param Name Variable name. - /// @param File File where this variable is defined. + /// @param F File where this variable is defined. /// @param LineNo Line number. /// @param Ty Variable Type /// @param Addr An array of complex address operations. diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index cfdeb46889..8d6054aa45 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -15,6 +15,7 @@ #ifndef LLVM_DEBUGINFO_DICONTEXT_H #define LLVM_DEBUGINFO_DICONTEXT_H +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" @@ -54,6 +55,23 @@ public: } }; +/// DIInliningInfo - a format-neutral container for inlined code description. +class DIInliningInfo { + SmallVector<DILineInfo, 4> Frames; + public: + DIInliningInfo() {} + DILineInfo getFrame(unsigned Index) const { + assert(Index < Frames.size()); + return Frames[Index]; + } + uint32_t getNumberOfFrames() const { + return Frames.size(); + } + void addFrame(const DILineInfo &Frame) { + Frames.push_back(Frame); + } +}; + /// DILineInfoSpecifier - controls which fields of DILineInfo container /// should be filled with data. class DILineInfoSpecifier { @@ -81,12 +99,15 @@ public: StringRef abbrevSection, StringRef aRangeSection = StringRef(), StringRef lineSection = StringRef(), - StringRef stringSection = StringRef()); + StringRef stringSection = StringRef(), + StringRef rangeSection = StringRef()); virtual void dump(raw_ostream &OS) = 0; - virtual DILineInfo getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier = DILineInfoSpecifier()) = 0; + virtual DILineInfo getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; + virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; }; } diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index a5c9272d3c..9e5ad2feb0 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -73,6 +73,10 @@ public: /// and resolve relocatons based on where they put it). void *getSymbolAddress(StringRef Name); + /// Get the address of the target copy of the symbol. This is the address + /// used for relocation. + uint64_t getSymbolLoadAddress(StringRef Name); + /// Resolve the relocations for all symbols we currently know about. void resolveRelocations(); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 3c0ab0f33c..ccf906be08 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -141,6 +141,7 @@ void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); +void initializeProfileMetadataLoaderPassPass(PassRegistry&); void initializePathProfileLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopDeletionPass(PassRegistry&); @@ -231,6 +232,7 @@ void initializeSinkingPass(PassRegistry&); void initializeSlotIndexesPass(PassRegistry&); void initializeSpillPlacementPass(PassRegistry&); void initializeStackProtectorPass(PassRegistry&); +void initializeStackColoringPass(PassRegistry&); void initializeStackSlotColoringPass(PassRegistry&); void initializeStripDeadDebugInfoPass(PassRegistry&); void initializeStripDeadPrototypesPassPass(PassRegistry&); diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index 37aa18bfff..a0aecfd8e4 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -33,6 +33,13 @@ template<class ConstantClass, class TypeClass, class ValType> struct ConstantCreator; class InlineAsm : public Value { +public: + enum AsmDialect { + AD_ATT, + AD_Intel + }; + +private: friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>; friend class ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType, InlineAsm, false>; @@ -43,10 +50,11 @@ class InlineAsm : public Value { std::string AsmString, Constraints; bool HasSideEffects; bool IsAlignStack; - + AsmDialect Dialect; + InlineAsm(PointerType *Ty, const std::string &AsmString, const std::string &Constraints, bool hasSideEffects, - bool isAlignStack); + bool isAlignStack, AsmDialect asmDialect); virtual ~InlineAsm(); /// When the ConstantUniqueMap merges two types and makes two InlineAsms @@ -58,11 +66,13 @@ public: /// static InlineAsm *get(FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, - bool isAlignStack = false); + bool isAlignStack = false, + AsmDialect asmDialect = AD_ATT); bool hasSideEffects() const { return HasSideEffects; } bool isAlignStack() const { return IsAlignStack; } - + AsmDialect getDialect() const { return Dialect; } + /// getType - InlineAsm's are always pointers. /// PointerType *getType() const { @@ -193,17 +203,18 @@ public: Op_InputChain = 0, Op_AsmString = 1, Op_MDNode = 2, - Op_ExtraInfo = 3, // HasSideEffects, IsAlignStack + Op_ExtraInfo = 3, // HasSideEffects, IsAlignStack, AsmDialect. Op_FirstOperand = 4, // Fixed operands on an INLINEASM MachineInstr. MIOp_AsmString = 0, - MIOp_ExtraInfo = 1, // HasSideEffects, IsAlignStack + MIOp_ExtraInfo = 1, // HasSideEffects, IsAlignStack, AsmDialect. MIOp_FirstOperand = 2, // Interpretation of the MIOp_ExtraInfo bit field. Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + Extra_AsmDialect = 4, // Inline asm operands map to multiple SDNode / MachineInstr operands. // The first operand is an immediate describing the asm operand, the low diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index 2529f24fe9..6291a6d988 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -581,8 +581,8 @@ public: /// Determine how a pair of casts can be eliminated, if they can be at all. /// This is a helper function for both CastInst and ConstantExpr. - /// @returns 0 if the CastInst pair can't be eliminated - /// @returns Instruction::CastOps value for a cast that can replace + /// @returns 0 if the CastInst pair can't be eliminated, otherwise + /// returns Instruction::CastOps value for a cast that can replace /// the pair, casting SrcTy to DstTy. /// @brief Determine if a cast pair is eliminable static unsigned isEliminableCastPair( diff --git a/include/llvm/IntrinsicsMips.td b/include/llvm/IntrinsicsMips.td index 4375ac2a7a..e40e162a15 100644 --- a/include/llvm/IntrinsicsMips.td +++ b/include/llvm/IntrinsicsMips.td @@ -14,11 +14,15 @@ //===----------------------------------------------------------------------===// // MIPS DSP data types def mips_v2q15_ty: LLVMType<v2i16>; +def mips_v4q7_ty: LLVMType<v4i8>; def mips_q31_ty: LLVMType<i32>; let TargetPrefix = "mips" in { // All intrinsics start with "llvm.mips.". //===----------------------------------------------------------------------===// +// MIPS DSP Rev 1 + +//===----------------------------------------------------------------------===// // Addition/subtraction def int_mips_addu_qb : GCCBuiltin<"__builtin_mips_addu_qb">, @@ -261,4 +265,125 @@ def int_mips_lhx: GCCBuiltin<"__builtin_mips_lhx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>; def int_mips_lwx: GCCBuiltin<"__builtin_mips_lwx">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>; + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 2 + +def int_mips_absq_s_qb: GCCBuiltin<"__builtin_mips_absq_s_qb">, + Intrinsic<[mips_v4q7_ty], [mips_v4q7_ty], []>; + +def int_mips_addqh_ph: GCCBuiltin<"__builtin_mips_addqh_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_addqh_r_ph: GCCBuiltin<"__builtin_mips_addqh_r_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], + [IntrNoMem, Commutative]>; +def int_mips_addqh_w: GCCBuiltin<"__builtin_mips_addqh_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], + [IntrNoMem, Commutative]>; +def int_mips_addqh_r_w: GCCBuiltin<"__builtin_mips_addqh_r_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], + [IntrNoMem, Commutative]>; + +def int_mips_addu_ph: GCCBuiltin<"__builtin_mips_addu_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; +def int_mips_addu_s_ph: GCCBuiltin<"__builtin_mips_addu_s_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; + +def int_mips_adduh_qb: GCCBuiltin<"__builtin_mips_adduh_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; +def int_mips_adduh_r_qb: GCCBuiltin<"__builtin_mips_adduh_r_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], + [IntrNoMem, Commutative]>; + +def int_mips_append: GCCBuiltin<"__builtin_mips_append">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; +def int_mips_cmpgdu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgdu_lt_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; +def int_mips_cmpgdu_le_qb: GCCBuiltin<"__builtin_mips_cmpgdu_le_qb">, + Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>; + +def int_mips_dpa_w_ph: GCCBuiltin<"__builtin_mips_dpa_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], + [IntrNoMem]>; +def int_mips_dps_w_ph: GCCBuiltin<"__builtin_mips_dps_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], + [IntrNoMem]>; + +def int_mips_dpaqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_s_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; +def int_mips_dpaqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpaqx_sa_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; +def int_mips_dpax_w_ph: GCCBuiltin<"__builtin_mips_dpax_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], + [IntrNoMem]>; +def int_mips_dpsx_w_ph: GCCBuiltin<"__builtin_mips_dpsx_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], + [IntrNoMem]>; +def int_mips_dpsqx_s_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_s_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; +def int_mips_dpsqx_sa_w_ph: GCCBuiltin<"__builtin_mips_dpsqx_sa_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>; + +def int_mips_mul_ph: GCCBuiltin<"__builtin_mips_mul_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; +def int_mips_mul_s_ph: GCCBuiltin<"__builtin_mips_mul_s_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], [Commutative]>; + +def int_mips_mulq_rs_w: GCCBuiltin<"__builtin_mips_mulq_rs_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>; +def int_mips_mulq_s_ph: GCCBuiltin<"__builtin_mips_mulq_s_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>; +def int_mips_mulq_s_w: GCCBuiltin<"__builtin_mips_mulq_s_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>; +def int_mips_mulsa_w_ph: GCCBuiltin<"__builtin_mips_mulsa_w_ph">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty], + [IntrNoMem]>; + +def int_mips_precr_qb_ph: GCCBuiltin<"__builtin_mips_precr_qb_ph">, + Intrinsic<[llvm_v4i8_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; +def int_mips_precr_sra_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_ph_w">, + Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_mips_precr_sra_r_ph_w: GCCBuiltin<"__builtin_mips_precr_sra_r_ph_w">, + Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_mips_prepend: GCCBuiltin<"__builtin_mips_prepend">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_mips_shra_qb: GCCBuiltin<"__builtin_mips_shra_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shra_r_qb: GCCBuiltin<"__builtin_mips_shra_r_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_mips_shrl_ph: GCCBuiltin<"__builtin_mips_shrl_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_mips_subqh_ph: GCCBuiltin<"__builtin_mips_subqh_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_subqh_r_ph: GCCBuiltin<"__builtin_mips_subqh_r_ph">, + Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>; +def int_mips_subqh_w: GCCBuiltin<"__builtin_mips_subqh_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; +def int_mips_subqh_r_w: GCCBuiltin<"__builtin_mips_subqh_r_w">, + Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>; + +def int_mips_subu_ph: GCCBuiltin<"__builtin_mips_subu_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; +def int_mips_subu_s_ph: GCCBuiltin<"__builtin_mips_subu_s_ph">, + Intrinsic<[llvm_v2i16_ty], [llvm_v2i16_ty, llvm_v2i16_ty], []>; + +def int_mips_subuh_qb: GCCBuiltin<"__builtin_mips_subuh_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; +def int_mips_subuh_r_qb: GCCBuiltin<"__builtin_mips_subuh_r_qb">, + Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>; } diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index e8039f2358..5ff085633e 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -219,7 +219,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4f32_ty], []>; + llvm_v4f32_ty], [IntrReadWriteArgMem]>; } // Cacheability support ops @@ -502,13 +502,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">, Intrinsic<[], [llvm_ptr_ty, - llvm_v2f64_ty], []>; + llvm_v2f64_ty], [IntrReadWriteArgMem]>; def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">, Intrinsic<[], [llvm_ptr_ty, - llvm_v16i8_ty], []>; + llvm_v16i8_ty], [IntrReadWriteArgMem]>; def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4i32_ty], []>; + llvm_v4i32_ty], [IntrReadWriteArgMem]>; } // Misc. @@ -1270,19 +1270,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_vbroadcast_ss : GCCBuiltin<"__builtin_ia32_vbroadcastss">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcast_sd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastsd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcast_ss_256 : GCCBuiltin<"__builtin_ia32_vbroadcastss256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcastf128_pd_256 : GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx_vbroadcastf128_ps_256 : GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>; } // SIMD load ops @@ -1294,41 +1294,45 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // SIMD store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>; def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>; def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">, - Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], [IntrReadWriteArgMem]>; } // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">, - Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty], [IntrReadMem]>; + Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty], + [IntrReadArgMem]>; def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty], + [IntrReadArgMem]>; def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">, - Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty], + [IntrReadArgMem]>; def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadMem]>; + Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], + [IntrReadArgMem]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">, Intrinsic<[], [llvm_ptr_ty, - llvm_v2f64_ty, llvm_v2f64_ty], []>; + llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4f32_ty, llvm_v4f32_ty], []>; + llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_pd_256 : GCCBuiltin<"__builtin_ia32_maskstorepd256">, Intrinsic<[], [llvm_ptr_ty, - llvm_v4f64_ty, llvm_v4f64_ty], []>; + llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>; def int_x86_avx_maskstore_ps_256 : GCCBuiltin<"__builtin_ia32_maskstoreps256">, Intrinsic<[], [llvm_ptr_ty, - llvm_v8f32_ty, llvm_v8f32_ty], []>; + llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>; } //===----------------------------------------------------------------------===// @@ -1632,7 +1636,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx2_vbroadcasti128 : GCCBuiltin<"__builtin_ia32_vbroadcastsi256">, - Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>; def int_x86_avx2_pbroadcastb_128 : GCCBuiltin<"__builtin_ia32_pbroadcastb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; @@ -1685,27 +1689,35 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Conditional load ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, - Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], + [IntrReadArgMem]>; def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">, - Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], + [IntrReadArgMem]>; def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem]>; + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], + [IntrReadArgMem]>; def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, - Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], + [IntrReadArgMem]>; } // Conditional store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrReadWriteArgMem]>; def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">, - Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrReadWriteArgMem]>; def int_x86_avx2_maskstore_d_256 : GCCBuiltin<"__builtin_ia32_maskstored256">, - Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrReadWriteArgMem]>; def int_x86_avx2_maskstore_q_256 : GCCBuiltin<"__builtin_ia32_maskstoreq256">, - Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>; + Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrReadWriteArgMem]>; } // Variable bit shift ops diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 697c94c094..fe4c92a295 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -107,6 +107,7 @@ namespace { (void) llvm::createProfileVerifierPass(); (void) llvm::createPathProfileVerifierPass(); (void) llvm::createProfileLoaderPass(); + (void) llvm::createProfileMetadataLoaderPass(); (void) llvm::createPathProfileLoaderPass(); (void) llvm::createPromoteMemoryToRegisterPass(); (void) llvm::createDemoteRegisterToMemoryPass(); diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 56a489c9f2..048d469fd5 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -31,8 +31,8 @@ class raw_ostream; /// MCAsmBackend - Generic interface to target specific assembler backends. class MCAsmBackend { - MCAsmBackend(const MCAsmBackend &); // DO NOT IMPLEMENT - void operator=(const MCAsmBackend &); // DO NOT IMPLEMENT + MCAsmBackend(const MCAsmBackend &) LLVM_DELETED_FUNCTION; + void operator=(const MCAsmBackend &) LLVM_DELETED_FUNCTION; protected: // Can only create subclasses. MCAsmBackend(); @@ -134,6 +134,13 @@ public: /// @} + /// getMinimumNopSize - Returns the minimum size of a nop in bytes on this + /// target. The assembler will use this to emit excess padding in situations + /// where the padding required for simple alignment would be less than the + /// minimum nop size. + /// + virtual unsigned getMinimumNopSize() const { return 1; } + /// writeNopData - Write an (optimal) nop sequence of Count bytes to the given /// output. If the target cannot generate such a sequence, it should return an /// error. diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index a0bf996daf..a38b9a85c2 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -40,8 +40,8 @@ class MCAsmBackend; class MCFragment : public ilist_node<MCFragment> { friend class MCAsmLayout; - MCFragment(const MCFragment&); // DO NOT IMPLEMENT - void operator=(const MCFragment&); // DO NOT IMPLEMENT + MCFragment(const MCFragment&) LLVM_DELETED_FUNCTION; + void operator=(const MCFragment&) LLVM_DELETED_FUNCTION; public: enum FragmentType { @@ -233,7 +233,7 @@ public: typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator; public: - MCInstFragment(MCInst _Inst, MCSectionData *SD = 0) + MCInstFragment(const MCInst &_Inst, MCSectionData *SD = 0) : MCFragment(FT_Inst, SD), Inst(_Inst) { } @@ -248,7 +248,7 @@ public: MCInst &getInst() { return Inst; } const MCInst &getInst() const { return Inst; } - void setInst(MCInst Value) { Inst = Value; } + void setInst(const MCInst& Value) { Inst = Value; } /// @} /// @name Fixup Access diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h index 934ef69ce3..4f7d103060 100644 --- a/include/llvm/MC/MCCodeEmitter.h +++ b/include/llvm/MC/MCCodeEmitter.h @@ -10,6 +10,8 @@ #ifndef LLVM_MC_MCCODEEMITTER_H #define LLVM_MC_MCCODEEMITTER_H +#include "llvm/Support/Compiler.h" + namespace llvm { class MCFixup; class MCInst; @@ -19,8 +21,8 @@ template<typename T> class SmallVectorImpl; /// MCCodeEmitter - Generic instruction encoding interface. class MCCodeEmitter { private: - MCCodeEmitter(const MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const MCCodeEmitter &); // DO NOT IMPLEMENT + MCCodeEmitter(const MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const MCCodeEmitter &) LLVM_DELETED_FUNCTION; protected: // Can only create subclasses. MCCodeEmitter(); diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 59545d31a6..23652f00d0 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -40,8 +40,8 @@ namespace llvm { /// of the sections that it creates. /// class MCContext { - MCContext(const MCContext&); // DO NOT IMPLEMENT - MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT + MCContext(const MCContext&) LLVM_DELETED_FUNCTION; + MCContext &operator=(const MCContext&) LLVM_DELETED_FUNCTION; public: typedef StringMap<MCSymbol*, BumpPtrAllocator&> SymbolTable; private: diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index fdb7ab23c0..9e09ddf411 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -19,6 +19,7 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/Compiler.h" #include <vector> namespace llvm { @@ -48,8 +49,8 @@ namespace llvm { MCDwarfFile(StringRef name, unsigned dirIndex) : Name(name), DirIndex(dirIndex) {} - MCDwarfFile(const MCDwarfFile&); // DO NOT IMPLEMENT - void operator=(const MCDwarfFile&); // DO NOT IMPLEMENT + MCDwarfFile(const MCDwarfFile&) LLVM_DELETED_FUNCTION; + void operator=(const MCDwarfFile&) LLVM_DELETED_FUNCTION; public: /// getName - Get the base name of this MCDwarfFile. StringRef getName() const { return Name; } diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index aa62eb2b16..f36db3c05a 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -41,8 +41,8 @@ public: private: ExprKind Kind; - MCExpr(const MCExpr&); // DO NOT IMPLEMENT - void operator=(const MCExpr&); // DO NOT IMPLEMENT + MCExpr(const MCExpr&) LLVM_DELETED_FUNCTION; + void operator=(const MCExpr&) LLVM_DELETED_FUNCTION; bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, @@ -78,11 +78,11 @@ public: /// values. If not given, then only non-symbolic expressions will be /// evaluated. /// @result - True on success. + bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, + const SectionAddrMap &Addrs) const; bool EvaluateAsAbsolute(int64_t &Res) const; bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const; bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const; - bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout, - const SectionAddrMap &Addrs) const; /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable /// value, i.e. an expression of the fixed form (a - b + constant). @@ -171,7 +171,8 @@ public: VK_ARM_GOTTPOFF, VK_ARM_TARGET1, - VK_PPC_TOC, + VK_PPC_TOC, // TOC base + VK_PPC_TOC_ENTRY, // TOC entry VK_PPC_DARWIN_HA16, // ha16(symbol) VK_PPC_DARWIN_LO16, // lo16(symbol) VK_PPC_GAS_HA16, // symbol@ha diff --git a/include/llvm/MC/MCLabel.h b/include/llvm/MC/MCLabel.h index 727520d4af..c72aabd03a 100644 --- a/include/llvm/MC/MCLabel.h +++ b/include/llvm/MC/MCLabel.h @@ -14,6 +14,8 @@ #ifndef LLVM_MC_MCLABEL_H #define LLVM_MC_MCLABEL_H +#include "llvm/Support/Compiler.h" + namespace llvm { class MCContext; class raw_ostream; @@ -30,8 +32,8 @@ namespace llvm { MCLabel(unsigned instance) : Instance(instance) {} - MCLabel(const MCLabel&); // DO NOT IMPLEMENT - void operator=(const MCLabel&); // DO NOT IMPLEMENT + MCLabel(const MCLabel&) LLVM_DELETED_FUNCTION; + void operator=(const MCLabel&) LLVM_DELETED_FUNCTION; public: /// getInstance - Get the current instance of this Directional Local Label. unsigned getInstance() const { return Instance; } diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index aef6303bf7..466773bc0a 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -88,6 +88,7 @@ public: virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label); virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitGPRel64Value(const MCExpr *Value); virtual void FinishImpl(); /// @} diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 9591a00946..14fe75fd4c 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -11,6 +11,7 @@ #define LLVM_MC_MCOBJECTWRITER_H #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include <cassert> @@ -35,8 +36,8 @@ class MCValue; /// The object writer also contains a number of helper methods for writing /// binary data to the output stream. class MCObjectWriter { - MCObjectWriter(const MCObjectWriter &); // DO NOT IMPLEMENT - void operator=(const MCObjectWriter &); // DO NOT IMPLEMENT + MCObjectWriter(const MCObjectWriter &) LLVM_DELETED_FUNCTION; + void operator=(const MCObjectWriter &) LLVM_DELETED_FUNCTION; protected: raw_ostream &OS; diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 9a8735f3e7..e102dfb82c 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -31,8 +31,8 @@ class AsmLexer : public MCAsmLexer { const MemoryBuffer *CurBuf; bool isAtStartOfLine; - void operator=(const AsmLexer&); // DO NOT IMPLEMENT - AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT + void operator=(const AsmLexer&) LLVM_DELETED_FUNCTION; + AsmLexer(const AsmLexer&) LLVM_DELETED_FUNCTION; protected: /// LexToken - Read the next token and return its code. diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index 5e29ad49dd..ca163c50e3 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -11,6 +11,7 @@ #define LLVM_MC_MCASMLEXER_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" @@ -121,8 +122,8 @@ class MCAsmLexer { SMLoc ErrLoc; std::string Err; - MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT - void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT + MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; + void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; protected: // Can only create subclasses. const char *TokStart; diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 793c7097ba..c673a79bd4 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -35,8 +35,8 @@ public: typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc); private: - MCAsmParser(const MCAsmParser &); // DO NOT IMPLEMENT - void operator=(const MCAsmParser &); // DO NOT IMPLEMENT + MCAsmParser(const MCAsmParser &) LLVM_DELETED_FUNCTION; + void operator=(const MCAsmParser &) LLVM_DELETED_FUNCTION; MCTargetAsmParser *TargetParser; diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h index 4e2aee9928..59593a88e1 100644 --- a/include/llvm/MC/MCParser/MCAsmParserExtension.h +++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -21,8 +21,8 @@ class Twine; /// which is implemented by target and object file assembly parser /// implementations. class MCAsmParserExtension { - MCAsmParserExtension(const MCAsmParserExtension &); // DO NOT IMPLEMENT - void operator=(const MCAsmParserExtension &); // DO NOT IMPLEMENT + MCAsmParserExtension(const MCAsmParserExtension &) LLVM_DELETED_FUNCTION; + void operator=(const MCAsmParserExtension &) LLVM_DELETED_FUNCTION; MCAsmParser *Parser; diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 7da6534b6e..a92fc379e1 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -15,7 +15,7 @@ #define LLVM_MC_MCSECTION_H #include "llvm/MC/SectionKind.h" -#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" namespace llvm { class MCAsmInfo; @@ -33,8 +33,8 @@ namespace llvm { }; private: - MCSection(const MCSection&); // DO NOT IMPLEMENT - void operator=(const MCSection&); // DO NOT IMPLEMENT + MCSection(const MCSection&) LLVM_DELETED_FUNCTION; + void operator=(const MCSection&) LLVM_DELETED_FUNCTION; protected: MCSection(SectionVariant V, SectionKind K) : Variant(V), Kind(K) {} SectionVariant Variant; diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 970bf4626b..391e1d59e8 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -47,8 +47,8 @@ namespace llvm { class MCStreamer { MCContext &Context; - MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT - MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT + MCStreamer(const MCStreamer&) LLVM_DELETED_FUNCTION; + MCStreamer &operator=(const MCStreamer&) LLVM_DELETED_FUNCTION; bool EmitEHFrame; bool EmitDebugFrame; @@ -602,9 +602,6 @@ namespace llvm { /// /// \param ShowInst - Whether to show the MCInst representation inline with /// the assembly. - /// - /// \param DecodeLSDA - If true, emit comments that translates the LSDA into a - /// human readable format. Only usable with CFI. MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index 31d632de60..6c96f49716 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -72,7 +72,7 @@ public: /// getSchedModelForCPU - Get the machine model of a CPU. /// - MCSchedModel *getSchedModelForCPU(StringRef CPU) const; + const MCSchedModel *getSchedModelForCPU(StringRef CPU) const; /// getInstrItineraryForCPU - Get scheduling itinerary of a CPU. /// diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 0583ce5682..4c9e7f5ffd 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -15,6 +15,7 @@ #define LLVM_MC_MCSYMBOL_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" namespace llvm { class MCExpr; @@ -62,8 +63,8 @@ namespace llvm { : Name(name), Section(0), Value(0), IsTemporary(isTemporary), IsUsed(false) {} - MCSymbol(const MCSymbol&); // DO NOT IMPLEMENT - void operator=(const MCSymbol&); // DO NOT IMPLEMENT + MCSymbol(const MCSymbol&) LLVM_DELETED_FUNCTION; + void operator=(const MCSymbol&) LLVM_DELETED_FUNCTION; public: /// getName - Get the symbol name. StringRef getName() const { return Name; } diff --git a/include/llvm/MC/MCTargetAsmLexer.h b/include/llvm/MC/MCTargetAsmLexer.h index f5c8c09df0..d09fe0498e 100644 --- a/include/llvm/MC/MCTargetAsmLexer.h +++ b/include/llvm/MC/MCTargetAsmLexer.h @@ -24,8 +24,8 @@ class MCTargetAsmLexer { SMLoc ErrLoc; std::string Err; - MCTargetAsmLexer(const MCTargetAsmLexer &); // DO NOT IMPLEMENT - void operator=(const MCTargetAsmLexer &); // DO NOT IMPLEMENT + MCTargetAsmLexer(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION; + void operator=(const MCTargetAsmLexer &) LLVM_DELETED_FUNCTION; protected: // Can only create subclasses. MCTargetAsmLexer(const Target &); diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h index 91b604b716..709c2d245c 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCTargetAsmParser.h @@ -25,7 +25,6 @@ template <typename T> class SmallVectorImpl; class MCTargetAsmParser : public MCAsmParserExtension { public: enum MatchResultTy { - Match_ConversionFail, Match_InvalidOperand, Match_MissingFeature, Match_MnemonicFail, @@ -34,8 +33,8 @@ public: }; private: - MCTargetAsmParser(const MCTargetAsmParser &); // DO NOT IMPLEMENT - void operator=(const MCTargetAsmParser &); // DO NOT IMPLEMENT + MCTargetAsmParser(const MCTargetAsmParser &) LLVM_DELETED_FUNCTION; + void operator=(const MCTargetAsmParser &) LLVM_DELETED_FUNCTION; protected: // Can only create subclasses. MCTargetAsmParser(); @@ -86,7 +85,7 @@ public: /// On failure, the target parser is responsible for emitting a diagnostic /// explaining the match failure. virtual bool - MatchInstruction(SMLoc IDLoc, + MatchInstruction(SMLoc IDLoc, unsigned &Kind, SmallVectorImpl<MCParsedAsmOperand*> &Operands, SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, @@ -112,6 +111,10 @@ public: return Match_Success; } + virtual unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, + unsigned &NumMCOperands) = 0; }; } // End llvm namespace diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h index 507d882775..87c5fd3969 100644 --- a/include/llvm/MC/SubtargetFeature.h +++ b/include/llvm/MC/SubtargetFeature.h @@ -50,7 +50,7 @@ struct SubtargetFeatureKV { // struct SubtargetInfoKV { const char *Key; // K-V key string - void *Value; // K-V pointer value + const void *Value; // K-V pointer value // Compare routine for std binary search bool operator<(const SubtargetInfoKV &S) const { @@ -96,8 +96,8 @@ public: size_t FeatureTableSize); /// Get scheduling itinerary of a CPU. - void *getItinerary(const StringRef CPU, - const SubtargetInfoKV *Table, size_t TableSize); + const void *getItinerary(const StringRef CPU, + const SubtargetInfoKV *Table, size_t TableSize); /// Print feature string. void print(raw_ostream &OS) const; diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 5b95557360..d672e96f5c 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -1444,6 +1444,143 @@ error_code ELFObjectFile<target_endianness, is64Bits> res = "Unknown"; } break; + case ELF::EM_ARM: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PC24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ABS5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BREL_ADJ); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_SWI8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_XPC25); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_XPC22); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_7_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_15_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_23_15); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SBREL_11_0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_19_12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_27_20_CK); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL31); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_V4BX); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PREL31); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_ABS_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_PREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_ABS_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_PREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ALU_PREL_11_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32_NOI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32_NOI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GOTDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESCSEQ); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTRELAX); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTENTRY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTINHERIT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP11); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDM32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE12GP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_4); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_7); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_9); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_10); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_11); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_13); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_15); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ME_TOO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ32); + default: + res = "Unknown"; + } + break; case ELF::EM_HEXAGON: switch (type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE); @@ -1574,15 +1711,15 @@ error_code ELFObjectFile<target_endianness, is64Bits> int64_t addend = 0; uint16_t symbol_index = 0; switch (sec->sh_type) { - default : + default: return object_error::parse_failed; - case ELF::SHT_REL : { + case ELF::SHT_REL: { type = getRel(Rel)->getType(); symbol_index = getRel(Rel)->getSymbol(); // TODO: Read implicit addend from section data. break; } - case ELF::SHT_RELA : { + case ELF::SHT_RELA: { type = getRela(Rel)->getType(); symbol_index = getRela(Rel)->getSymbol(); addend = getRela(Rel)->r_addend; @@ -1596,9 +1733,8 @@ error_code ELFObjectFile<target_endianness, is64Bits> switch (Header->e_machine) { case ELF::EM_X86_64: switch (type) { - case ELF::R_X86_64_32S: - res = symname; - break; + case ELF::R_X86_64_PC8: + case ELF::R_X86_64_PC16: case ELF::R_X86_64_PC32: { std::string fmtbuf; raw_string_ostream fmt(fmtbuf); @@ -1607,10 +1743,23 @@ error_code ELFObjectFile<target_endianness, is64Bits> Result.append(fmtbuf.begin(), fmtbuf.end()); } break; + case ELF::R_X86_64_8: + case ELF::R_X86_64_16: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + case ELF::R_X86_64_64: { + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + fmt << symname << (addend < 0 ? "" : "+") << addend; + fmt.flush(); + Result.append(fmtbuf.begin(), fmtbuf.end()); + } + break; default: res = "Unknown"; } break; + case ELF::EM_ARM: case ELF::EM_HEXAGON: res = symname; break; diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index cf7125173e..8c389afa80 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -72,6 +72,10 @@ template <size_t Alignment> struct AlignedCharArrayImpl {}; template <> struct AlignedCharArrayImpl<0> { typedef char type; }; + +// MSVC requires special handling here. +#ifndef _MSC_VER + #if __has_feature(cxx_alignas) #define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ template <> struct AlignedCharArrayImpl<x> { \ @@ -82,11 +86,6 @@ template <> struct AlignedCharArrayImpl<0> { template <> struct AlignedCharArrayImpl<x> { \ typedef char type __attribute__((aligned(x))); \ } -#elif defined(_MSC_VER) -#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ - template <> struct AlignedCharArrayImpl<x> { \ - typedef __declspec(align(x)) char type; \ - } #else # error No supported align as directive. #endif @@ -104,9 +103,38 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1024); LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2048); LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4096); LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192); + +#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT + +#else // _MSC_VER + +// We provide special variations of this template for the most common +// alignments because __declspec(align(...)) doesn't actually work when it is +// a member of a by-value function argument in MSVC, even if the alignment +// request is something reasonably like 8-byte or 16-byte. +template <> struct AlignedCharArrayImpl<1> { typedef char type; }; +template <> struct AlignedCharArrayImpl<2> { typedef short type; }; +template <> struct AlignedCharArrayImpl<4> { typedef int type; }; +template <> struct AlignedCharArrayImpl<8> { typedef double type; }; + +#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ + template <> struct AlignedCharArrayImpl<x> { \ + typedef __declspec(align(x)) char type; \ + } +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(512); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1024); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2048); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4096); +LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192); // Any larger and MSVC complains. #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT +#endif // _MSC_VER + /// \brief This union template exposes a suitably aligned and sized character /// array member which can hold elements of any of up to four types. /// diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index c23bb6a97d..8905e1e33b 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -81,7 +81,7 @@ public: InstrTy *operator->() const { return I.getPointer(); } operator bool() const { return I.getPointer(); } - /// getCalledValue - Return the pointer to function that is being called... + /// getCalledValue - Return the pointer to function that is being called. /// ValTy *getCalledValue() const { assert(getInstruction() && "Not a call or invoke instruction!"); @@ -95,7 +95,7 @@ public: return dyn_cast<FunTy>(getCalledValue()); } - /// setCalledFunction - Set the callee to the specified value... + /// setCalledFunction - Set the callee to the specified value. /// void setCalledFunction(Value *V) { assert(getInstruction() && "Not a call or invoke instruction!"); @@ -130,7 +130,7 @@ public: } /// arg_iterator - The type of iterator to use when looping over actual - /// arguments at this call site... + /// arguments at this call site. typedef IterTy arg_iterator; /// arg_begin/arg_end - Return iterators corresponding to the actual argument diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index ea0a4da27f..1136ff70c3 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -24,7 +24,7 @@ /// does not imply the existence of any other C++ library features. #if (__has_feature(cxx_rvalue_references) \ || defined(__GXX_EXPERIMENTAL_CXX0X__) \ - || _MSC_VER >= 1600) + || (defined(_MSC_VER) && _MSC_VER >= 1600)) #define LLVM_USE_RVALUE_REFERENCES 1 #else #define LLVM_USE_RVALUE_REFERENCES 0 @@ -106,9 +106,11 @@ #endif #if (__GNUC__ >= 4) -#define BUILTIN_EXPECT(EXPR, VALUE) __builtin_expect((EXPR), (VALUE)) +#define LLVM_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true) +#define LLVM_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false) #else -#define BUILTIN_EXPECT(EXPR, VALUE) (EXPR) +#define LLVM_LIKELY(EXPR) (EXPR) +#define LLVM_UNLIKELY(EXPR) (EXPR) #endif diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h index 506ec96930..8d880fd5e8 100644 --- a/include/llvm/Support/DataExtractor.h +++ b/include/llvm/Support/DataExtractor.h @@ -99,8 +99,8 @@ public: /// enough bytes to extract this value, the offset will be left /// unmodified. /// - /// @param[in] byte_size - /// The size in byte of the integer to extract. + /// @param[in] size + /// The size in bytes of the integer to extract. /// /// @return /// The sign extended signed integer value that was extracted, diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index f4a9aa0e89..5d6020502d 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -40,7 +40,7 @@ #include <string> #include <vector> -#if HAVE_SYS_STAT_H +#ifdef HAVE_SYS_STAT_H #include <sys/stat.h> #endif @@ -432,7 +432,7 @@ error_code is_other(const Twine &path, bool &result); /// @brief Does status represent a symlink? /// /// @param status A file_status previously returned from stat. -/// @param result status.type() == symlink_file. +/// @returns status.type() == symlink_file. bool is_symlink(file_status status); /// @brief Is path a symlink? @@ -461,7 +461,7 @@ error_code permissions(const Twine &path, perms prms); /// @brief Is status available? /// -/// @param path Input path. +/// @param s Input file status. /// @results True if status() != status_error. bool status_known(file_status s); @@ -486,7 +486,7 @@ error_code status_known(const Twine &path, bool &result); /// clang-%%-%%-%%-%%-%%.s => /tmp/clang-a0-b1-c2-d3-e4.s /// /// @param model Name to base unique path off of. -/// @param result_fs Set to the opened file's file descriptor. +/// @param result_fd Set to the opened file's file descriptor. /// @param result_path Set to the opened file's absolute path. /// @param makeAbsolute If true and @model is not an absolute path, a temp /// directory will be prepended. @@ -586,9 +586,9 @@ class mapped_file_region { public: enum mapmode { - readonly, //< May only access map via const_data as read only. - readwrite, //< May access map via data and modify it. Written to path. - priv //< May modify via data, but changes are lost on destruction. + readonly, ///< May only access map via const_data as read only. + readwrite, ///< May access map via data and modify it. Written to path. + priv ///< May modify via data, but changes are lost on destruction. }; private: @@ -596,7 +596,7 @@ private: mapmode Mode; uint64_t Size; void *Mapping; -#if LLVM_ON_WIN32 +#ifdef LLVM_ON_WIN32 int FileDescriptor; void *FileHandle; void *FileMappingHandle; @@ -658,7 +658,7 @@ public: /// /// @param path Path to file to map. /// @param file_offset Byte offset in file where mapping should begin. -/// @param size_t Byte length of range of the file to map. +/// @param size Byte length of range of the file to map. /// @param map_writable If true, the file will be mapped in r/w such /// that changes to the mapped buffer will be flushed back /// to the file. If false, the file will be mapped read-only diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h index 19e1ce89cb..e552315f45 100644 --- a/include/llvm/Support/GCOV.h +++ b/include/llvm/Support/GCOV.h @@ -27,13 +27,15 @@ class GCOVBlock; class GCOVLines; class FileInfo; -enum GCOVFormat { - InvalidGCOV, - GCNO_402, - GCNO_404, - GCDA_402, - GCDA_404 -}; +namespace GCOV { + enum GCOVFormat { + InvalidGCOV, + GCNO_402, + GCNO_404, + GCDA_402, + GCDA_404 + }; +} // end GCOV namespace /// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific /// read operations. @@ -42,20 +44,20 @@ public: GCOVBuffer(MemoryBuffer *B) : Buffer(B), Cursor(0) {} /// readGCOVFormat - Read GCOV signature at the beginning of buffer. - enum GCOVFormat readGCOVFormat() { + GCOV::GCOVFormat readGCOVFormat() { StringRef Magic = Buffer->getBuffer().slice(0, 12); Cursor = 12; if (Magic == "oncg*404MVLL") - return GCNO_404; + return GCOV::GCNO_404; else if (Magic == "oncg*204MVLL") - return GCNO_402; + return GCOV::GCNO_402; else if (Magic == "adcg*404MVLL") - return GCDA_404; + return GCOV::GCDA_404; else if (Magic == "adcg*204MVLL") - return GCDA_402; + return GCOV::GCDA_402; Cursor = 0; - return InvalidGCOV; + return GCOV::InvalidGCOV; } /// readFunctionTag - If cursor points to a function tag then increment the @@ -128,7 +130,7 @@ public: StringRef Str = Buffer->getBuffer().slice(Cursor, Cursor+4); assert (Str.empty() == false && "Unexpected memory buffer end!"); Cursor += 4; - Result = *(uint32_t *)(Str.data()); + Result = *(const uint32_t *)(Str.data()); return Result; } @@ -170,7 +172,7 @@ class GCOVFunction { public: GCOVFunction() : Ident(0), LineNumber(0) {} ~GCOVFunction(); - bool read(GCOVBuffer &Buffer, GCOVFormat Format); + bool read(GCOVBuffer &Buffer, GCOV::GCOVFormat Format); void dump(); void collectLineCounts(FileInfo &FI); private: diff --git a/include/llvm/Support/IntegersSubsetMapping.h b/include/llvm/Support/IntegersSubsetMapping.h index cab18dce15..7635d5e912 100644 --- a/include/llvm/Support/IntegersSubsetMapping.h +++ b/include/llvm/Support/IntegersSubsetMapping.h @@ -42,6 +42,7 @@ public: struct RangeEx : public RangeTy { RangeEx() : Weight(1) {} RangeEx(const RangeTy &R) : RangeTy(R), Weight(1) {} + RangeEx(const RangeTy &R, unsigned W) : RangeTy(R), Weight(W) {} RangeEx(const IntTy &C) : RangeTy(C), Weight(1) {} RangeEx(const IntTy &L, const IntTy &H) : RangeTy(L, H), Weight(1) {} RangeEx(const IntTy &L, const IntTy &H, unsigned W) : @@ -316,13 +317,13 @@ public: Items.clear(); const IntTy *Low = &OldItems.begin()->first.getLow(); const IntTy *High = &OldItems.begin()->first.getHigh(); - unsigned Weight = 1; + unsigned Weight = OldItems.begin()->first.Weight; SuccessorClass *Successor = OldItems.begin()->second; for (CaseItemIt j = OldItems.begin(), i = j++, e = OldItems.end(); j != e; i = j++) { if (isJoinable(i, j)) { const IntTy *CurHigh = &j->first.getHigh(); - ++Weight; + Weight += j->first.Weight; if (*CurHigh > *High) High = CurHigh; } else { @@ -330,7 +331,7 @@ public: add(R, Successor); Low = &j->first.getLow(); High = &j->first.getHigh(); - Weight = 1; + Weight = j->first.Weight; Successor = j->second; } } @@ -362,10 +363,17 @@ public: /// Adds all ranges and values from given ranges set to the current /// mapping. - void add(const IntegersSubsetTy &CRS, SuccessorClass *S = 0) { + void add(const IntegersSubsetTy &CRS, SuccessorClass *S = 0, + unsigned Weight = 0) { + unsigned ItemWeight = 1; + if (Weight) + // Weight is associated with CRS, for now we perform a division to + // get the weight for each item. + ItemWeight = Weight / CRS.getNumItems(); for (unsigned i = 0, e = CRS.getNumItems(); i < e; ++i) { RangeTy R = CRS.getItem(i); - add(R, S); + RangeEx REx(R, ItemWeight); + add(REx, S); } } diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 4005161320..35c2694cff 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -463,12 +463,24 @@ template <unsigned B> inline int32_t SignExtend32(uint32_t x) { return int32_t(x << (32 - B)) >> (32 - B); } +/// \brief Sign extend number in the bottom B bits of X to a 32-bit int. +/// Requires 0 < B <= 32. +inline int32_t SignExtend32(uint32_t X, unsigned B) { + return int32_t(X << (32 - B)) >> (32 - B); +} + /// SignExtend64 - Sign extend B-bit number x to 64-bit int. /// Usage int64_t r = SignExtend64<5>(x); template <unsigned B> inline int64_t SignExtend64(uint64_t x) { return int64_t(x << (64 - B)) >> (64 - B); } +/// \brief Sign extend number in the bottom B bits of X to a 64-bit int. +/// Requires 0 < B <= 64. +inline int64_t SignExtend64(uint64_t X, unsigned B) { + return int64_t(X << (64 - B)) >> (64 - B); +} + } // End llvm namespace #endif diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h index f4bedf92c4..643ee8c6c1 100644 --- a/include/llvm/Support/PathV1.h +++ b/include/llvm/Support/PathV1.h @@ -683,8 +683,8 @@ namespace sys { /// This function returns status information about the file. The type of /// path (file or directory) is updated to reflect the actual contents /// of the file system. - /// @returns 0 on failure, with Error explaining why (if non-zero) - /// @returns a pointer to a FileStatus structure on success. + /// @returns 0 on failure, with Error explaining why (if non-zero), + /// otherwise returns a pointer to a FileStatus structure on success. /// @brief Get file status. const FileStatus *getFileStatus( bool forceUpdate = false, ///< Force an update from the file system diff --git a/include/llvm/Support/PathV2.h b/include/llvm/Support/PathV2.h index 8d797097a8..967ea1e1d1 100644 --- a/include/llvm/Support/PathV2.h +++ b/include/llvm/Support/PathV2.h @@ -133,7 +133,7 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension); /// foo + bar/f => foo/bar/f /// /// @param path Set to \a path + \a component. -/// @param component The component to be appended to \a path. +/// @param a The component to be appended to \a path. void append(SmallVectorImpl<char> &path, const Twine &a, const Twine &b = "", const Twine &c = "", @@ -272,7 +272,7 @@ bool is_separator(char value); /// ignored if the user or system has set the typical environment variable /// (e.g., TEMP on Windows, TMPDIR on *nix) to specify a temporary directory. /// -/// @param Result Holds the resulting path name. +/// @param result Holds the resulting path name. void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result); /// @brief Has root name? diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index 8949a3a908..3835e84592 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -145,7 +145,7 @@ public: /// GetMessage - Return an SMDiagnostic at the specified location with the /// specified string. /// - /// @param Type - If non-null, the kind of message (e.g., "error") which is + /// @param Msg If non-null, the kind of message (e.g., "error") which is /// prefixed to the message. SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const; diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h index 94f132a05c..e780b50c60 100644 --- a/include/llvm/Support/TimeValue.h +++ b/include/llvm/Support/TimeValue.h @@ -153,7 +153,6 @@ namespace sys { /// Determine if \p this is greater than or equal to \p that. /// @returns True iff *this >= that. - /// @brief True if this >= that. int operator >= (const TimeValue &that) const { if ( this->seconds_ > that.seconds_ ) { return 1; @@ -164,8 +163,7 @@ namespace sys { } /// Determines if two TimeValue objects represent the same moment in time. - /// @brief True iff *this == that. - /// @brief True if this == that. + /// @returns True iff *this == that. int operator == (const TimeValue &that) const { return (this->seconds_ == that.seconds_) && (this->nanos_ == that.nanos_); @@ -173,8 +171,7 @@ namespace sys { /// Determines if two TimeValue objects represent times that are not the /// same. - /// @return True iff *this != that. - /// @brief True if this != that. + /// @returns True iff *this != that. int operator != (const TimeValue &that) const { return !(*this == that); } /// Adds two TimeValue objects together. diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 5de749aeae..9913f989fc 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -210,13 +210,16 @@ public: /// Changes the foreground color of text that will be output from this point /// forward. - /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to + /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to /// change only the bold attribute, and keep colors untouched - /// @param bold bold/brighter text, default false - /// @param bg if true change the background, default: change foreground + /// @param Bold bold/brighter text, default false + /// @param BG if true change the background, default: change foreground /// @returns itself so it can be used within << invocations - virtual raw_ostream &changeColor(enum Colors, bool = false, bool = false) { - return *this; } + virtual raw_ostream &changeColor(enum Colors Color, + bool Bold = false, + bool BG = false) { + return *this; + } /// Resets the colors to terminal defaults. Call this when you are done /// outputting colored text, or before program exit. diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/SymbolTableListTraits.h index 91a4eb99ff..ec5c88f5c8 100644 --- a/include/llvm/SymbolTableListTraits.h +++ b/include/llvm/SymbolTableListTraits.h @@ -46,7 +46,6 @@ public: /// getListOwner - Return the object that owns this list. If this is a list /// of instructions, it returns the BasicBlock that owns them. ItemParentClass *getListOwner() { - typedef iplist<ValueSubClass> ItemParentClass::*Sublist; size_t Offset(size_t(&((ItemParentClass*)0->*ItemParentClass:: getSublistAccess(static_cast<ValueSubClass*>(0))))); iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this)); diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index fd5f805ffc..5c1c3adf7e 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -20,21 +20,22 @@ namespace llvm { class TGError { - SMLoc Loc; + SmallVector<SMLoc, 4> Locs; std::string Message; public: - TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {} + TGError(ArrayRef<SMLoc> locs, const std::string &message) + : Locs(locs.begin(), locs.end()), Message(message) {} - SMLoc getLoc() const { return Loc; } + ArrayRef<SMLoc> getLoc() const { return Locs; } const std::string &getMessage() const { return Message; } }; -void PrintWarning(SMLoc WarningLoc, const Twine &Msg); +void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg); void PrintWarning(const char *Loc, const Twine &Msg); void PrintWarning(const Twine &Msg); void PrintWarning(const TGError &Warning); -void PrintError(SMLoc ErrorLoc, const Twine &Msg); +void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); void PrintError(const char *Loc, const Twine &Msg); void PrintError(const Twine &Msg); void PrintError(const TGError &Error); diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index a8256b7735..c7baaeb9d9 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -509,6 +509,18 @@ public: virtual Init *resolveReferences(Record &R, const RecordVal *RV) const { return const_cast<Init *>(this); } + + /// getBit - This method is used to return the initializer for the specified + /// bit. + virtual Init *getBit(unsigned Bit) const = 0; + + /// getBitVar - This method is used to retrieve the initializer for bit + /// reference. For non-VarBitInit, it simply returns itself. + virtual Init *getBitVar() const { return const_cast<Init*>(this); } + + /// getBitNum - This method is used to retrieve the bit number of a bit + /// reference. For non-VarBitInit, it simply returns 0. + virtual unsigned getBitNum() const { return 0; } }; inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) { @@ -541,13 +553,6 @@ public: /// virtual RecTy *getFieldType(const std::string &FieldName) const; - /// resolveBitReference - This method is used to implement - /// VarBitInit::resolveReferences. If the bit is able to be resolved, we - /// simply return the resolved value, otherwise we return null. - /// - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const = 0; - /// resolveListElementReference - This method is used to implement /// VarListElementInit::resolveReferences. If the list element is resolvable /// now, we return the resolved value, otherwise we return null. @@ -571,6 +576,10 @@ public: return Ty->convertValue(const_cast<UnsetInit *>(this)); } + virtual Init *getBit(unsigned Bit) const { + return const_cast<UnsetInit*>(this); + } + virtual bool isComplete() const { return false; } virtual std::string getAsString() const { return "?"; } }; @@ -595,6 +604,11 @@ public: return Ty->convertValue(const_cast<BitInit *>(this)); } + virtual Init *getBit(unsigned Bit) const { + assert(Bit < 1 && "Bit index out of range!"); + return const_cast<BitInit*>(this); + } + virtual std::string getAsString() const { return Value ? "1" : "0"; } }; @@ -616,11 +630,6 @@ public: unsigned getNumBits() const { return Bits.size(); } - Init *getBit(unsigned Bit) const { - assert(Bit < Bits.size() && "Bit index out of range!"); - return Bits[Bit]; - } - virtual Init *convertInitializerTo(RecTy *Ty) const { return Ty->convertValue(const_cast<BitsInit *>(this)); } @@ -640,6 +649,11 @@ public: virtual std::string getAsString() const; virtual Init *resolveReferences(Record &R, const RecordVal *RV) const; + + virtual Init *getBit(unsigned Bit) const { + assert(Bit < Bits.size() && "Bit index out of range!"); + return Bits[Bit]; + } }; @@ -666,15 +680,6 @@ public: virtual std::string getAsString() const; - /// resolveBitReference - This method is used to implement - /// VarBitInit::resolveReferences. If the bit is able to be resolved, we - /// simply return the resolved value, otherwise we return null. - /// - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - llvm_unreachable("Illegal bit reference off int"); - } - /// resolveListElementReference - This method is used to implement /// VarListElementInit::resolveReferences. If the list element is resolvable /// now, we return the resolved value, otherwise we return null. @@ -682,6 +687,10 @@ public: unsigned Elt) const { llvm_unreachable("Illegal element reference off int"); } + + virtual Init *getBit(unsigned Bit) const { + return BitInit::get((Value & (1 << Bit)) != 0); + } }; @@ -709,15 +718,6 @@ public: virtual std::string getAsString() const { return "\"" + Value + "\""; } virtual std::string getAsUnquotedString() const { return Value; } - /// resolveBitReference - This method is used to implement - /// VarBitInit::resolveReferences. If the bit is able to be resolved, we - /// simply return the resolved value, otherwise we return null. - /// - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - llvm_unreachable("Illegal bit reference off string"); - } - /// resolveListElementReference - This method is used to implement /// VarListElementInit::resolveReferences. If the list element is resolvable /// now, we return the resolved value, otherwise we return null. @@ -725,6 +725,10 @@ public: unsigned Elt) const { llvm_unreachable("Illegal element reference off string"); } + + virtual Init *getBit(unsigned Bit) const { + llvm_unreachable("Illegal bit reference off string"); + } }; /// ListInit - [AL, AH, CL] - Represent a list of defs @@ -777,20 +781,15 @@ public: inline size_t size () const { return Values.size(); } inline bool empty() const { return Values.empty(); } - /// resolveBitReference - This method is used to implement - /// VarBitInit::resolveReferences. If the bit is able to be resolved, we - /// simply return the resolved value, otherwise we return null. - /// - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - llvm_unreachable("Illegal bit reference off list"); - } - /// resolveListElementReference - This method is used to implement /// VarListElementInit::resolveReferences. If the list element is resolvable /// now, we return the resolved value, otherwise we return null. virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const; + + virtual Init *getBit(unsigned Bit) const { + llvm_unreachable("Illegal bit reference off list"); + } }; @@ -818,10 +817,10 @@ public: return Ty->convertValue(const_cast<OpInit *>(this)); } - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const; virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const; + + virtual Init *getBit(unsigned Bit) const; }; @@ -1003,8 +1002,6 @@ public: return getNameInit()->getAsUnquotedString(); } - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const; virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const; @@ -1019,6 +1016,8 @@ public: /// virtual Init *resolveReferences(Record &R, const RecordVal *RV) const; + virtual Init *getBit(unsigned Bit) const; + virtual std::string getAsString() const { return getName(); } }; @@ -1030,8 +1029,10 @@ class VarBitInit : public Init { unsigned Bit; VarBitInit(TypedInit *T, unsigned B) : TI(T), Bit(B) { - assert(T->getType() && dynamic_cast<BitsRecTy*>(T->getType()) && - ((BitsRecTy*)T->getType())->getNumBits() > B && + assert(T->getType() && + (dynamic_cast<IntRecTy*>(T->getType()) || + (dynamic_cast<BitsRecTy*>(T->getType()) && + dynamic_cast<BitsRecTy*>(T->getType())->getNumBits() > B)) && "Illegal VarBitInit expression!"); } @@ -1045,11 +1046,16 @@ public: return Ty->convertValue(const_cast<VarBitInit *>(this)); } - TypedInit *getVariable() const { return TI; } - unsigned getBitNum() const { return Bit; } + virtual Init *getBitVar() const { return TI; } + virtual unsigned getBitNum() const { return Bit; } virtual std::string getAsString() const; virtual Init *resolveReferences(Record &R, const RecordVal *RV) const; + + virtual Init *getBit(unsigned B) const { + assert(B < 1 && "Bit index out of range!"); + return const_cast<VarBitInit*>(this); + } }; /// VarListElementInit - List[4] - Represent access to one element of a var or @@ -1080,9 +1086,6 @@ public: TypedInit *getVariable() const { return TI; } unsigned getElementNum() const { return Element; } - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const; - /// resolveListElementReference - This method is used to implement /// VarListElementInit::resolveReferences. If the list element is resolvable /// now, we return the resolved value, otherwise we return null. @@ -1092,6 +1095,8 @@ public: virtual std::string getAsString() const; virtual Init *resolveReferences(Record &R, const RecordVal *RV) const; + + virtual Init *getBit(unsigned Bit) const; }; /// DefInit - AL - Represent a reference to a 'def' in the description @@ -1122,12 +1127,7 @@ public: virtual std::string getAsString() const; - /// resolveBitReference - This method is used to implement - /// VarBitInit::resolveReferences. If the bit is able to be resolved, we - /// simply return the resolved value, otherwise we return null. - /// - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { + virtual Init *getBit(unsigned Bit) const { llvm_unreachable("Illegal bit reference off def"); } @@ -1163,8 +1163,8 @@ public: return Ty->convertValue(const_cast<FieldInit *>(this)); } - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const; + virtual Init *getBit(unsigned Bit) const; + virtual Init *resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const; @@ -1243,8 +1243,7 @@ public: inline size_t name_size () const { return ArgNames.size(); } inline bool name_empty() const { return ArgNames.empty(); } - virtual Init *resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { + virtual Init *getBit(unsigned Bit) const { llvm_unreachable("Illegal bit reference off dag"); } @@ -1301,7 +1300,9 @@ class Record { // Unique record ID. unsigned ID; Init *Name; - SMLoc Loc; + // Location where record was instantiated, followed by the location of + // multiclass prototypes used. + SmallVector<SMLoc, 4> Locs; std::vector<Init *> TemplateArgs; std::vector<RecordVal> Values; std::vector<Record*> SuperClasses; @@ -1317,13 +1318,15 @@ class Record { public: // Constructs a record. - explicit Record(const std::string &N, SMLoc loc, RecordKeeper &records) : - ID(LastID++), Name(StringInit::get(N)), Loc(loc), TrackedRecords(records), - TheInit(0) { + explicit Record(const std::string &N, ArrayRef<SMLoc> locs, + RecordKeeper &records) : + ID(LastID++), Name(StringInit::get(N)), Locs(locs.begin(), locs.end()), + TrackedRecords(records), TheInit(0) { init(); } - explicit Record(Init *N, SMLoc loc, RecordKeeper &records) : - ID(LastID++), Name(N), Loc(loc), TrackedRecords(records), TheInit(0) { + explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records) : + ID(LastID++), Name(N), Locs(locs.begin(), locs.end()), + TrackedRecords(records), TheInit(0) { init(); } ~Record() {} @@ -1345,7 +1348,7 @@ public: void setName(Init *Name); // Also updates RecordKeeper. void setName(const std::string &Name); // Also updates RecordKeeper. - SMLoc getLoc() const { return Loc; } + ArrayRef<SMLoc> getLoc() const { return Locs; } /// get the corresponding DefInit. DefInit *getDefInit(); @@ -1507,6 +1510,12 @@ public: /// bool getValueAsBit(StringRef FieldName) const; + /// getValueAsBitOrUnset - This method looks up the specified field and + /// returns its value as a bit. If the field is unset, sets Unset to true and + /// retunrs false. + /// + bool getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const; + /// getValueAsInt - This method looks up the specified field and returns its /// value as an int64_t, throwing an exception if the field does not exist or /// if the value is not the right type. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index e5ae9bec73..09f6929bd2 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -343,8 +343,8 @@ class Instruction { bit isBarrier = 0; // Can control flow fall through this instruction? bit isCall = 0; // Is this instruction a call instruction? bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? - bit mayLoad = 0; // Is it possible for this inst to read memory? - bit mayStore = 0; // Is it possible for this inst to write memory? + bit mayLoad = ?; // Is it possible for this inst to read memory? + bit mayStore = ?; // Is it possible for this inst to write memory? bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? @@ -369,7 +369,7 @@ class Instruction { // // neverHasSideEffects - Set on an instruction with no pattern if it has no // side effects. - bit hasSideEffects = 0; + bit hasSideEffects = ?; bit neverHasSideEffects = 0; // Is this instruction a "real" instruction (with a distinct machine @@ -602,23 +602,31 @@ def f64imm : Operand<f64>; /// def zero_reg; +/// OperandWithDefaultOps - This Operand class can be used as the parent class +/// for an Operand that needs to be initialized with a default value if +/// no value is supplied in a pattern. This class can be used to simplify the +/// pattern definitions for instructions that have target specific flags +/// encoded as immediate operands. +class OperandWithDefaultOps<ValueType ty, dag defaultops> + : Operand<ty> { + dag DefaultOps = defaultops; +} + /// PredicateOperand - This can be used to define a predicate operand for an /// instruction. OpTypes specifies the MIOperandInfo for the operand, and /// AlwaysVal specifies the value of this predicate when set to "always /// execute". class PredicateOperand<ValueType ty, dag OpTypes, dag AlwaysVal> - : Operand<ty> { + : OperandWithDefaultOps<ty, AlwaysVal> { let MIOperandInfo = OpTypes; - dag DefaultOps = AlwaysVal; } /// OptionalDefOperand - This is used to define a optional definition operand /// for an instruction. DefaultOps is the register the operand represents if /// none is supplied, e.g. zero_reg. class OptionalDefOperand<ValueType ty, dag OpTypes, dag defaultops> - : Operand<ty> { + : OperandWithDefaultOps<ty, defaultops> { let MIOperandInfo = OpTypes; - dag DefaultOps = defaultops; } @@ -631,6 +639,17 @@ class InstrInfo { // Sparc manual specifies its instructions in the format [31..0] (big), while // PowerPC specifies them using the format [0..31] (little). bit isLittleEndianEncoding = 0; + + // The instruction properties mayLoad, mayStore, and hasSideEffects are unset + // by default, and TableGen will infer their value from the instruction + // pattern when possible. + // + // Normally, TableGen will issue an error it it can't infer the value of a + // property that hasn't been set explicitly. When guessInstructionProperties + // is set, it will guess a safe value instead. + // + // This option is a temporary migration help. It will go away. + bit guessInstructionProperties = 1; } // Standard Pseudo Instructions. @@ -734,6 +753,18 @@ def BUNDLE : Instruction { let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; } +def LIFETIME_START : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$id); + let AsmString = "LIFETIME_START"; + let neverHasSideEffects = 1; +} +def LIFETIME_END : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$id); + let AsmString = "LIFETIME_END"; + let neverHasSideEffects = 1; +} // @LOCALMOD-BEGIN def BUNDLE_ALIGN_START : Instruction { let OutOperandList = (outs); diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index da30ab82d6..d7cc1cf45a 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -459,6 +459,13 @@ public: } /// copyPhysReg - Emit instructions to copy a pair of physical registers. + /// + /// This function should support copies within any legal register class as + /// well as any cross-class copies created during instruction selection. + /// + /// The source and destination registers may overlap, which may require a + /// careful implementation when multiple copy instructions are required for + /// large registers. See for example the ARM target. virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -794,20 +801,6 @@ public: const MachineInstr *UseMI, unsigned UseIdx, bool FindMin = false) const; - /// computeOperandLatency - Compute and return the latency of the given data - /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for - /// an unknown use. If the subtarget allows, this may or may not need to call - /// getOperandLatency(). - /// - /// FindMin may be set to get the minimum vs. expected latency. Minimum - /// latency is used for scheduling groups, while expected latency is for - /// instruction cost and critical path. - unsigned computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, - const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const; - /// getOutputLatency - Compute and return the output dependency latency of a /// a given pair of defs which both target the same register. This is usually /// one. diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index a44105bbb2..2a0a43229f 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -18,6 +18,26 @@ namespace llvm { namespace LibFunc { enum Func { + /// void operator delete[](void*); + ZdaPv, + /// void operator delete(void*); + ZdlPv, + /// void *new[](unsigned int); + Znaj, + /// void *new[](unsigned int, nothrow); + ZnajRKSt9nothrow_t, + /// void *new[](unsigned long); + Znam, + /// void *new[](unsigned long, nothrow); + ZnamRKSt9nothrow_t, + /// void *new(unsigned int); + Znwj, + /// void *new(unsigned int, nothrow); + ZnwjRKSt9nothrow_t, + /// void *new(unsigned long); + Znwm, + /// void *new(unsigned long, nothrow); + ZnwmRKSt9nothrow_t, /// int __cxa_atexit(void (*f)(void *), void *p, void *d); cxa_atexit, /// void __cxa_guard_abort(guard_t *guard); @@ -71,6 +91,8 @@ namespace llvm { atanhl, /// long double atanl(long double x); atanl, + /// void *calloc(size_t count, size_t size); + calloc, /// double cbrt(double x); cbrt, /// float cbrtf(float x); @@ -149,6 +171,8 @@ namespace llvm { fputc, /// int fputs(const char *s, FILE *stream); fputs, + /// void free(void *ptr); + free, /// size_t fwrite(const void *ptr, size_t size, size_t nitems, /// FILE *stream); fwrite, @@ -184,6 +208,8 @@ namespace llvm { logf, /// long double logl(long double x); logl, + /// void *malloc(size_t size); + malloc, /// void *memchr(const void *s, int c, size_t n); memchr, /// int memcmp(const void *s1, const void *s2, size_t n); @@ -202,6 +228,8 @@ namespace llvm { nearbyintf, /// long double nearbyintl(long double x); nearbyintl, + /// int posix_memalign(void **memptr, size_t alignment, size_t size); + posix_memalign, /// double pow(double x, double y); pow, /// float powf(float x, float y); @@ -212,6 +240,10 @@ namespace llvm { putchar, /// int puts(const char *s); puts, + /// void *realloc(void *ptr, size_t size); + realloc, + /// void *reallocf(void *ptr, size_t size); + reallocf, /// double rint(double x); rint, /// float rintf(float x); @@ -250,6 +282,8 @@ namespace llvm { strchr, /// char *strcpy(char *s1, const char *s2); strcpy, + /// char *strdup(const char *s1); + strdup, /// size_t strlen(const char *s); strlen, /// char *strncat(char *s1, const char *s2, size_t n); @@ -258,6 +292,8 @@ namespace llvm { strncmp, /// char *strncpy(char *s1, const char *s2, size_t n); strncpy, + /// char *strndup(const char *s1, size_t n); + strndup, /// size_t strnlen(const char *s, size_t maxlen); strnlen, /// double tan(double x); @@ -278,6 +314,8 @@ namespace llvm { truncf, /// long double truncl(long double x); truncl, + /// void *valloc(size_t size); + valloc, NumLibFuncs }; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f928c5f6a8..03f7dcd864 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -25,6 +25,7 @@ #include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" #include "llvm/Attributes.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/CallSite.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -107,6 +108,14 @@ public: ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. }; + enum SelectSupportKind { + ScalarValSelect, // The target supports scalar selects (ex: cmov). + ScalarCondVectorVal, // The target supports selects with a scalar condition + // and vector values (ex: cmov). + VectorMaskSelect // The target supports vector selects with a vector + // mask (ex: x86 blends). + }; + // @LOCALMOD-START // This needs to be kept in sync with // native_client/src/untrusted/nacl/pnaclintrin.h. @@ -152,10 +161,22 @@ public: /// this target. bool isSelectExpensive() const { return SelectIsExpensive; } + virtual bool isSelectSupported(SelectSupportKind kind) const { return true; } + /// isIntDivCheap() - Return true if integer divide is usually cheaper than /// a sequence of several shifts, adds, and multiplies for this target. bool isIntDivCheap() const { return IntDivIsCheap; } + /// isSlowDivBypassed - Returns true if target has indicated at least one + /// type should be bypassed. + bool isSlowDivBypassed() const { return !BypassSlowDivTypes.empty(); } + + /// getBypassSlowDivTypes - Returns map of slow types for division or + /// remainder with corresponding fast types + const DenseMap<Type *, Type *> &getBypassSlowDivTypes() const { + return BypassSlowDivTypes; + } + /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of /// srl/add/sra. bool isPow2DivCheap() const { return Pow2DivIsCheap; } @@ -1057,6 +1078,11 @@ protected: /// of instructions not containing an integer divide. void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } + /// addBypassSlowDivType - Tells the code generator which types to bypass. + void addBypassSlowDivType(Type *slow_type, Type *fast_type) { + BypassSlowDivTypes[slow_type] = fast_type; + } + /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. @@ -1774,6 +1800,12 @@ private: /// set to true unconditionally. bool IntDivIsCheap; + /// BypassSlowDivTypes - Tells the code generator to bypass slow divide or + /// remainder instructions. For example, SlowDivBypass[i32,u8] tells the code + /// generator to bypass 32-bit signed integer div/rem with an 8-bit unsigned + /// integer div/rem when the operands are positive and less than 256. + DenseMap <Type *, Type *> BypassSlowDivTypes; + /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index 4a38524ad1..1f4b90e3b2 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -87,13 +87,17 @@ namespace TargetOpcode { /// BUNDLE - This instruction represents an instruction bundle. Instructions /// which immediately follow a BUNDLE instruction which are marked with /// 'InsideBundle' flag are inside the bundle. - BUNDLE, + BUNDLE = 14, + + /// Lifetime markers. + LIFETIME_START = 15, + LIFETIME_END = 16, // @LOCALMOD-BEGIN BUNDLE_ALIGN_START = 14, BUNDLE_ALIGN_END = 15, BUNDLE_LOCK = 16, - BUNDLE_UNLOCK = 17 + BUNDLE_UNLOCK = 17, // @LOCALMOD-END }; } // end namespace TargetOpcode diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 3f81c06bc0..83bd7874df 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -445,9 +445,9 @@ def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2, def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 8a939cc75e..2510aecc69 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -27,6 +27,7 @@ class AliasAnalysis; class Instruction; class Pass; class ReturnInst; +class TargetLibraryInfo; /// DeleteDeadBlock - Delete the specified block, which must have no /// predecessors. @@ -44,7 +45,7 @@ void FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P = 0); /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. Return true /// if any PHIs were deleted. -bool DeleteDeadPHIs(BasicBlock *BB); +bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = 0); /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h new file mode 100644 index 0000000000..3646d736ab --- /dev/null +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -0,0 +1,29 @@ +//===- llvm/Transforms/Utils/BypassSlowDivision.h --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#ifndef TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H +#define TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H + +#include "llvm/Function.h" + +/// This optimization identifies DIV instructions that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(llvm::Function &F, + llvm::Function::iterator &I, + const llvm::DenseMap<llvm::Type *, llvm::Type *> &BypassTypeMap); + +#endif diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 495eab7328..83f0e7a4bd 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -36,6 +36,7 @@ class PHINode; class AllocaInst; class ConstantExpr; class TargetData; +class TargetLibraryInfo; class DIBuilder; template<typename T> class SmallVectorImpl; @@ -51,7 +52,8 @@ template<typename T> class SmallVectorImpl; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false); +bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false, + const TargetLibraryInfo *TLI = 0); //===----------------------------------------------------------------------===// // Local dead code elimination. @@ -60,20 +62,21 @@ bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false); /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool isInstructionTriviallyDead(Instruction *I); +bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=0); /// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool RecursivelyDeleteTriviallyDeadInstructions(Value *V); +bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI=0); /// RecursivelyDeleteDeadPHINode - If the specified value is an effectively /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool RecursivelyDeleteDeadPHINode(PHINode *PN); +bool RecursivelyDeleteDeadPHINode(PHINode *PN, const TargetLibraryInfo *TLI=0); /// SimplifyInstructionsInBlock - Scan the specified basic block and try to @@ -81,7 +84,8 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN); /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0); +bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0, + const TargetLibraryInfo *TLI = 0); //===----------------------------------------------------------------------===// // Control Flow Graph Restructuring. diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 3b6aab13a5..f768eeca41 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -36,6 +36,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; // Register the AliasAnalysis interface, providing a nice name to refer to. @@ -452,6 +453,7 @@ AliasAnalysis::~AliasAnalysis() {} /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { TD = P->getAnalysisIfAvailable<TargetData>(); + TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &P->getAnalysis<AliasAnalysis>(); } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 92e89068e4..e9dcb37903 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -550,7 +550,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { //===----------------------------------------------------------------------===// void AliasSet::print(raw_ostream &OS) const { - OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; + OS << " AliasSet[" << (const void*)this << ", " << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; @@ -590,8 +590,10 @@ void AliasSetTracker::print(raw_ostream &OS) const { OS << "\n"; } +#ifndef NDEBUG void AliasSet::dump() const { print(dbgs()); } void AliasSetTracker::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // ASTCallbackVH Class Implementation diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 0ba6af93b5..87a75fd3b1 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -61,6 +61,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePathProfileLoaderPassPass(Registry); initializeProfileVerifierPassPass(Registry); initializePathProfileVerifierPass(Registry); + initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 1d028c27b8..a3bc06a80f 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -85,9 +85,10 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD, + const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, RoundToAlign)) + if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -95,10 +96,11 @@ static uint64_t getObjectSize(const Value *V, const TargetData &TD, /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, uint64_t Size, - const TargetData &TD) { + const TargetData &TD, + const TargetLibraryInfo &TLI) { // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. - uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true); + uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -106,8 +108,8 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, /// isObjectSize - Return true if we can prove that the object specified /// by V has size Size. static bool isObjectSize(const Value *V, uint64_t Size, - const TargetData &TD) { - uint64_t ObjectSize = getObjectSize(V, TD); + const TargetData &TD, const TargetLibraryInfo &TLI) { + uint64_t ObjectSize = getObjectSize(V, TD, TLI); return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } @@ -126,6 +128,15 @@ namespace { const Value *V; ExtensionKind Extension; int64_t Scale; + + bool operator==(const VariableGEPIndex &Other) const { + return V == Other.V && Extension == Other.Extension && + Scale == Other.Scale; + } + + bool operator!=(const VariableGEPIndex &Other) const { + return !operator==(Other); + } }; } @@ -417,13 +428,7 @@ namespace { /// BasicAliasAnalysis - This is the primary alias analysis implementation. struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : ImmutablePass(ID), - // AliasCache rarely has more than 1 or 2 elements, - // so start it off fairly small so that clear() - // doesn't have to tromp through 64 (the default) - // elements on each alias query. This really wants - // something like a SmallDenseMap. - AliasCache(8) { + BasicAliasAnalysis() : ImmutablePass(ID) { initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); } @@ -443,7 +448,11 @@ namespace { "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, LocB.Ptr, LocB.Size, LocB.TBAATag); - AliasCache.clear(); + // AliasCache rarely has more than 1 or 2 elements, always use + // shrink_and_clear so it quickly returns to the inline capacity of the + // SmallDenseMap if it ever grows larger. + // FIXME: This should really be shrink_to_inline_capacity_and_clear(). + AliasCache.shrink_and_clear(); return Alias; } @@ -481,7 +490,7 @@ namespace { private: // AliasCache - Track alias queries to guard against recursion. typedef std::pair<Location, Location> LocPair; - typedef DenseMap<LocPair, AliasResult> AliasCacheTy; + typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; AliasCacheTy AliasCache; // Visited - Track instructions visited by pointsToConstantMemory. @@ -490,6 +499,7 @@ namespace { // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2); @@ -807,6 +817,21 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } +static bool areVarIndicesEqual(SmallVector<VariableGEPIndex, 4> &Indices1, + SmallVector<VariableGEPIndex, 4> &Indices2) { + unsigned Size1 = Indices1.size(); + unsigned Size2 = Indices2.size(); + + if (Size1 != Size2) + return false; + + for (unsigned I = 0; I != Size1; ++I) + if (Indices1[I] != Indices2[I]) + return false; + + return true; +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), @@ -814,6 +839,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, /// AliasAnalysis::AliasResult BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const MDNode *V1TBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo, const Value *UnderlyingV1, @@ -821,9 +847,41 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, int64_t GEP1BaseOffset; SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; - // If we have two gep instructions with must-alias'ing base pointers, figure - // out if the indexes to the GEP tell us anything about the derived pointer. + // If we have two gep instructions with must-alias or not-alias'ing base + // pointers, figure out if the indexes to the GEP tell us anything about the + // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Check for geps of non-aliasing underlying pointers where the offsets are + // identical. + if (V1Size == V2Size) { + // Do the base pointers alias assuming type and size. + AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, + V1TBAAInfo, UnderlyingV2, + V2Size, V2TBAAInfo); + if (PreciseBaseAlias == NoAlias) { + // See if the computed offset from the common pointer tells us about the + // relation of the resulting pointer. + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + // Same offsets. + if (GEP1BaseOffset == GEP2BaseOffset && + areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + return NoAlias; + GEP1VariableIndices.clear(); + } + } + // Do the base pointers alias? AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, UnderlyingV2, UnknownSize, 0); @@ -843,9 +901,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -879,9 +936,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - // If DecomposeGEPExpression isn't able to look all the way through the - // addressing operation, we must not have TD and this is too complex for us - // to handle without it. + // DecomposeGEPExpression and GetUnderlyingObject should return the + // same result except when DecomposeGEPExpression has no TargetData. if (GEP1BasePtr != UnderlyingV1) { assert(TD == 0 && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); @@ -1004,12 +1060,42 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // on corresponding edges. if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) if (PN2->getParent() == PN->getParent()) { + LocPair Locs(Location(PN, PNSize, PNTBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (PN > V2) + std::swap(Locs.first, Locs.second); + AliasResult Alias = aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), V2Size, V2TBAAInfo); if (Alias == MayAlias) return MayAlias; + + // If the first source of the PHI nodes NoAlias and the other inputs are + // the PHI node itself through some amount of recursion this does not add + // any new information so just return NoAlias. + // bb: + // ptr = ptr2 + 1 + // loop: + // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] + // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] + // ... + // ptr_plus_one = gep ptr_phi, 1 + // ptr2_plus_one = gep ptr2_phi, 1 + // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do + // not alias each other. + bool ArePhisAssumedNoAlias = false; + AliasResult OrigAliasResult; + if (Alias == NoAlias) { + // Pretend the phis do not alias. + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + ArePhisAssumedNoAlias = true; + } + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, @@ -1019,6 +1105,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, if (Alias == MayAlias) break; } + + // Reset if speculation failed. + if (ArePhisAssumedNoAlias && Alias != NoAlias) + AliasCache[Locs] = OrigAliasResult; + return Alias; } @@ -1133,8 +1224,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) - if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1156,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(O1, O2); } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); + AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } @@ -1184,8 +1275,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // accesses is accessing the entire object, then the accesses must // overlap in some way. if (TD && O1 == O2) - if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || - (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI))) return AliasCache[Locs] = PartialAlias; AliasResult Result = diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index b255ce6dba..04a6560262 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -115,14 +115,14 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; } - SmallPtrSet<BasicBlock *, 4> UnreachableEdges; - SmallPtrSet<BasicBlock *, 4> ReachableEdges; + SmallVector<unsigned, 4> UnreachableEdges; + SmallVector<unsigned, 4> ReachableEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (PostDominatedByUnreachable.count(*I)) - UnreachableEdges.insert(*I); + UnreachableEdges.push_back(I.getSuccessorIndex()); else - ReachableEdges.insert(*I); + ReachableEdges.push_back(I.getSuccessorIndex()); } // If all successors are in the set of blocks post-dominated by unreachable, @@ -136,18 +136,19 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { return false; uint32_t UnreachableWeight = - std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); - for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); + std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); + for (SmallVector<unsigned, 4>::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); if (ReachableEdges.empty()) return true; uint32_t ReachableWeight = - std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); - for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); + std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), + NORMAL_WEIGHT); + for (SmallVector<unsigned, 4>::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); @@ -187,7 +188,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, i, Weights[i]); return true; } @@ -211,19 +212,17 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { assert(CI->getOperand(1)->getType()->isPointerTy()); - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); - // p != 0 -> isProb = true // p == 0 -> isProb = false // p != q -> isProb = true // p == q -> isProb = false; + unsigned TakenIdx = 0, NonTakenIdx = 1; bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); return true; } @@ -234,17 +233,17 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (!L) return false; - SmallPtrSet<BasicBlock *, 8> BackEdges; - SmallPtrSet<BasicBlock *, 8> ExitingEdges; - SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop. + SmallVector<unsigned, 8> BackEdges; + SmallVector<unsigned, 8> ExitingEdges; + SmallVector<unsigned, 8> InEdges; // Edges from header to the loop. for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { if (!L->contains(*I)) - ExitingEdges.insert(*I); + ExitingEdges.push_back(I.getSuccessorIndex()); else if (L->getHeader() == *I) - BackEdges.insert(*I); + BackEdges.push_back(I.getSuccessorIndex()); else - InEdges.insert(*I); + InEdges.push_back(I.getSuccessorIndex()); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -252,10 +251,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, *EI, backWeight); } } @@ -264,10 +262,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (inWeight < NORMAL_WEIGHT) inWeight = NORMAL_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { - BasicBlock *Back = *EI; - setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, *EI, inWeight); } } @@ -276,10 +273,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + for (SmallVector<unsigned, 8>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { - BasicBlock *Exiting = *EI; - setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, *EI, exitWeight); } } @@ -335,14 +331,13 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); return true; } @@ -372,14 +367,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { return false; } - BasicBlock *Taken = BI->getSuccessor(0); - BasicBlock *NonTaken = BI->getSuccessor(1); + unsigned TakenIdx = 0, NonTakenIdx = 1; if (!isProb) - std::swap(Taken, NonTaken); + std::swap(TakenIdx, NonTakenIdx); - setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); - setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); return true; } @@ -389,11 +383,8 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { if (!II) return false; - BasicBlock *Normal = II->getNormalDest(); - BasicBlock *Unwind = II->getUnwindDest(); - - setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT); - setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); + setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); return true; } @@ -450,8 +441,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - const BasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); uint32_t PrevSum = Sum; Sum += Weight; @@ -494,11 +484,13 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { return 0; } -// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +/// Get the raw edge weight for the edge. If can't find it, return +/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index +/// to the successors. uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { - Edge E(Src, Dst); - DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); +getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { + DenseMap<Edge, uint32_t>::const_iterator I = + Weights.find(std::make_pair(Src, IndexInSuccessors)); if (I != Weights.end()) return I->second; @@ -506,15 +498,43 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { return DEFAULT_WEIGHT; } +/// Get the raw edge weight calculated for the block pair. This returns the sum +/// of all raw edge weights from Src to Dst. +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { + uint32_t Weight = 0; + DenseMap<Edge, uint32_t>::const_iterator MapI; + for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) + if (*I == Dst) { + MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); + if (MapI != Weights.end()) + Weight += MapI->second; + } + return (Weight == 0) ? DEFAULT_WEIGHT : Weight; +} + +/// Set the edge weight for a given edge specified by PredBlock and an index +/// to the successors. void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { - Weights[std::make_pair(Src, Dst)] = Weight; +setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, + uint32_t Weight) { + Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; DEBUG(dbgs() << "set edge " << Src->getName() << " -> " - << Dst->getName() << " weight to " << Weight - << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); + << IndexInSuccessors << " successor weight to " + << Weight << "\n"); } +/// Get an edge's probability, relative to other out-edges from Src. +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { + uint32_t N = getEdgeWeight(Src, IndexInSuccessors); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} +/// Get the probability of going from Src to Dst. It returns the sum of all +/// probabilities for edges from Src to Dst. BranchProbability BranchProbabilityInfo:: getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 96e68b4199..e461848e86 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -44,6 +44,8 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp + ProfileDataLoader.cpp + ProfileDataLoaderPass.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index f5e619c673..4ad613c66a 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -659,7 +659,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), - makeArrayRef((Value **)Ops.data() + 1, + makeArrayRef((Value *const*) + Ops.data() + 1, Ops.size() - 1))); Ptr = StripPtrCastKeepAS(Ptr); diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 1604576ec4..5536a9b705 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -133,7 +133,9 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { } } +#ifndef NDEBUG void DominanceFrontierBase::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 0df3e8a382..947ad519c6 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -198,9 +198,11 @@ void CallGraph::print(raw_ostream &OS, Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } +#ifndef NDEBUG void CallGraph::dump() const { print(dbgs(), 0); } +#endif //===----------------------------------------------------------------------===// // Implementations of public modification methods @@ -267,7 +269,9 @@ void CallGraphNode::print(raw_ostream &OS) const { OS << '\n'; } +#ifndef NDEBUG void CallGraphNode::dump() const { print(dbgs()); } +#endif /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 22f6e96b53..990caa80c8 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -263,7 +263,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) return true; - } else if (isFreeCall(U)) { + } else if (isFreeCall(U, TLI)) { Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); } else if (CallInst *CI = dyn_cast<CallInst>(U)) { // Make sure that this is just the function being called, not that it is @@ -329,7 +329,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); - if (!isAllocLikeFn(Ptr)) + if (!isAllocLikeFn(Ptr, TLI)) return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a @@ -458,7 +458,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (SI->isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isAllocationFn(&*II) || isFreeCall(&*II)) { + } else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) { FunctionEffect |= ModRef; } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { // The callgraph doesn't include intrinsic calls. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 0a6682a254..f70518165a 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -273,9 +273,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } } +#ifndef NDEBUG void IVUsers::dump() const { print(dbgs()); } +#endif void IVUsers::releaseMemory() { Processed.clear(); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index bc1ecd2ea4..12be7fdc14 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -974,6 +974,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { return AlwaysInline || Cost < Threshold; } +#ifndef NDEBUG /// \brief Dump stats about this call's analysis. void CallAnalyzer::dump() { #define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" @@ -987,6 +988,7 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(SROACostSavingsLost); #undef DEBUG_PRINT_STAT } +#endif InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9140786a1b..ec618fad22 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -470,8 +470,10 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { return true; LVIValueHandle ValHandle(Val, this); - if (!ValueCache.count(ValHandle)) return false; - return ValueCache[ValHandle].count(BB); + std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = + ValueCache.find(ValHandle); + if (I == ValueCache.end()) return false; + return I->second.count(BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -845,9 +847,12 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { ConstantRange EdgeVal(i.getCaseValue()->getValue()); - if (DefaultCase) - EdgesVals = EdgesVals.difference(EdgeVal); - else if (i.getCaseSuccessor() == BBTo) + if (DefaultCase) { + // It is possible that the default destination is the destination of + // some cases. There is no need to perform difference for those cases. + if (i.getCaseSuccessor() != BBTo) + EdgesVals = EdgesVals.difference(EdgeVal); + } else if (i.getCaseSuccessor() == BBTo) EdgesVals = EdgesVals.unionWith(EdgeVal); } Result = LVILatticeVal::getRange(EdgesVals); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 20c33a3d9d..4a18104776 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -306,9 +306,11 @@ BasicBlock *Loop::getUniqueExitBlock() const { return 0; } +#ifndef NDEBUG void Loop::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // UnloopUpdater implementation @@ -429,8 +431,8 @@ void UnloopUpdater::updateSubloopParents() { Unloop->removeChildLoop(llvm::prior(Unloop->end())); assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); - if (SubloopParents[Subloop]) - SubloopParents[Subloop]->addChildLoop(Subloop); + if (Loop *Parent = SubloopParents[Subloop]) + Parent->addChildLoop(Subloop); else LI->addTopLevelLoop(Subloop); } @@ -456,9 +458,8 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { assert(Subloop && "subloop is not an ancestor of the original loop"); } // Get the current nearest parent of the Subloop exits, initially Unloop. - if (!SubloopParents.count(Subloop)) - SubloopParents[Subloop] = Unloop; - NearLoop = SubloopParents[Subloop]; + NearLoop = + SubloopParents.insert(std::make_pair(Subloop, Unloop)).first->second; } succ_iterator I = succ_begin(BB), E = succ_end(BB); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b986b3258d..5b2313e3a6 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,7 +40,7 @@ enum AllocType { }; struct AllocFnsTy { - const char *Name; + LibFunc::Func Func; AllocType AllocTy; unsigned char NumParams; // First and Second size parameters (or -1 if unused) @@ -49,22 +50,22 @@ struct AllocFnsTy { // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const AllocFnsTy AllocationFnData[] = { - {"malloc", MallocLike, 1, 0, -1}, - {"valloc", MallocLike, 1, 0, -1}, - {"_Znwj", MallocLike, 1, 0, -1}, // new(unsigned int) - {"_ZnwjRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) - {"_Znwm", MallocLike, 1, 0, -1}, // new(unsigned long) - {"_ZnwmRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) - {"_Znaj", MallocLike, 1, 0, -1}, // new[](unsigned int) - {"_ZnajRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) - {"_Znam", MallocLike, 1, 0, -1}, // new[](unsigned long) - {"_ZnamRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) - {"posix_memalign", MallocLike, 3, 2, -1}, - {"calloc", CallocLike, 2, 0, 1}, - {"realloc", ReallocLike, 2, 1, -1}, - {"reallocf", ReallocLike, 2, 1, -1}, - {"strdup", StrDupLike, 1, -1, -1}, - {"strndup", StrDupLike, 2, 1, -1} + {LibFunc::malloc, MallocLike, 1, 0, -1}, + {LibFunc::valloc, MallocLike, 1, 0, -1}, + {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::posix_memalign, MallocLike, 3, 2, -1}, + {LibFunc::calloc, CallocLike, 2, 0, 1}, + {LibFunc::realloc, ReallocLike, 2, 1, -1}, + {LibFunc::reallocf, ReallocLike, 2, 1, -1}, + {LibFunc::strdup, StrDupLike, 1, -1, -1}, + {LibFunc::strndup, StrDupLike, 2, 1, -1} }; @@ -85,15 +86,22 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { /// \brief Returns the allocation data for the given value if it is a call to a /// known allocation function, and NULL otherwise. static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; + // Make sure that the function is available. + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + unsigned i = 0; bool found = false; for ( ; i < array_lengthof(AllocationFnData); ++i) { - if (Callee->getName() == AllocationFnData[i].Name) { + if (AllocationFnData[i].Func == TLIFn) { found = true; break; } @@ -106,7 +114,6 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, return 0; // Check function prototype. - // FIXME: Check the nobuiltin metadata?? (PR5130) int FstParam = FnData->FstParam; int SndParam = FnData->SndParam; FunctionType *FTy = Callee->getFunctionType(); @@ -132,57 +139,65 @@ static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). -bool llvm::isAllocationFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AnyAlloc, LookThroughBitCast); +bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). -bool llvm::isNoAliasFn(const Value *V, bool LookThroughBitCast) { +bool llvm::isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { // it's safe to consider realloc as noalias since accessing the original // pointer is undefined behavior - return isAllocationFn(V, LookThroughBitCast) || + return isAllocationFn(V, TLI, LookThroughBitCast) || hasNoAliasAttr(V, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates uninitialized memory (such as malloc). -bool llvm::isMallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, MallocLike, LookThroughBitCast); +bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). -bool llvm::isCallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, CallocLike, LookThroughBitCast); +bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). -bool llvm::isAllocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, AllocLike, LookThroughBitCast); +bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, TLI, LookThroughBitCast); } /// \brief Tests if a value is a call or invoke to a library function that /// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, bool LookThroughBitCast) { - return getAllocationData(V, ReallocLike, LookThroughBitCast); +bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); } /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I) { - return isMallocLikeFn(I) ? dyn_cast<CallInst>(I) : 0; +const CallInst *llvm::extractMallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; } static Value *computeArraySize(const CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) return NULL; // The size of the malloc's result type must be known to determine array size. - Type *T = getMallocAllocatedType(CI); + Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) return NULL; @@ -204,9 +219,11 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, /// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. -const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { - const CallInst *CI = extractMallocCall(I); - Value *ArraySize = computeArraySize(CI, TD); +const CallInst *llvm::isArrayMalloc(const Value *I, + const TargetData *TD, + const TargetLibraryInfo *TLI) { + const CallInst *CI = extractMallocCall(I, TLI); + Value *ArraySize = computeArraySize(CI, TD, TLI); if (ArraySize && ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) @@ -221,8 +238,9 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 0: PointerType is the calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMallocLikeFn(CI) && "getMallocType and not malloc call"); +PointerType *llvm::getMallocType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -252,8 +270,9 @@ PointerType *llvm::getMallocType(const CallInst *CI) { /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -Type *llvm::getMallocAllocatedType(const CallInst *CI) { - PointerType *PT = getMallocType(CI); +Type *llvm::getMallocAllocatedType(const CallInst *CI, + const TargetLibraryInfo *TLI) { + PointerType *PT = getMallocType(CI, TLI); return PT ? PT->getElementType() : NULL; } @@ -263,21 +282,23 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, bool LookThroughSExt) { - assert(isMallocLikeFn(CI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, TD, LookThroughSExt); + assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, TLI, LookThroughSExt); } /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. -const CallInst *llvm::extractCallocCall(const Value *I) { - return isCallocLikeFn(I) ? cast<CallInst>(I) : 0; +const CallInst *llvm::extractCallocCall(const Value *I, + const TargetLibraryInfo *TLI) { + return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0; } /// isFreeCall - Returns non-null if the value is a call to the builtin free() -const CallInst *llvm::isFreeCall(const Value *I) { +const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); if (!CI) return 0; @@ -285,9 +306,14 @@ const CallInst *llvm::isFreeCall(const Value *I) { if (Callee == 0 || !Callee->isDeclaration()) return 0; - if (Callee->getName() != "free" && - Callee->getName() != "_ZdlPv" && // operator delete(void*) - Callee->getName() != "_ZdaPv") // operator delete[](void*) + StringRef FnName = Callee->getName(); + LibFunc::Func TLIFn; + if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) + return 0; + + if (TLIFn != LibFunc::free && + TLIFn != LibFunc::ZdlPv && // operator delete(void*) + TLIFn != LibFunc::ZdaPv) // operator delete[](void*) return 0; // Check free prototype. @@ -316,11 +342,11 @@ const CallInst *llvm::isFreeCall(const Value *I) { /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, - bool RoundToAlign) { + const TargetLibraryInfo *TLI, bool RoundToAlign) { if (!TD) return false; - ObjectSizeOffsetVisitor Visitor(TD, Ptr->getContext(), RoundToAlign); + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -348,9 +374,10 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { } ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: TD(TD), RoundToAlign(RoundToAlign) { +: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { IntegerType *IntTy = TD->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); @@ -416,7 +443,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { } SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); @@ -532,8 +560,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, + const TargetLibraryInfo *TLI, LLVMContext &Context) -: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)) { +: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { IntTy = TD->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -558,7 +587,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOffsetVisitor Visitor(TD, Context); + ObjectSizeOffsetVisitor Visitor(TD, TLI, Context); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -621,7 +650,8 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { - const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, + TLI); if (!FnData) return unknown(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 059e57493b..5736c3569d 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -148,7 +148,7 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, return AliasAnalysis::ModRef; } - if (const CallInst *CI = isFreeCall(Inst)) { + if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { // calls to free() deallocate the entire structure Loc = AliasAnalysis::Location(CI->getArgOperand(0)); return AliasAnalysis::Mod; @@ -479,12 +479,20 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst)) { + const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); + if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); - continue; + // Be conservative if the accessed pointer may alias the allocation. + if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias) + return MemDepResult::getClobber(Inst); + // If the allocation is not aliased and does not read memory (like + // strdup), it is safe to ignore. + if (isa<AllocaInst>(Inst) || + isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI)) + continue; } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 38cb1c91f8..d6a17ca725 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -41,6 +41,7 @@ static bool CanPHITrans(Instruction *Inst) { return false; } +#ifndef NDEBUG void PHITransAddr::dump() const { if (Addr == 0) { dbgs() << "PHITransAddr: null\n"; @@ -50,6 +51,7 @@ void PHITransAddr::dump() const { for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; } +#endif static bool VerifySubExpr(Value *Expr, diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp new file mode 100644 index 0000000000..69286efb3c --- /dev/null +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -0,0 +1,162 @@ +//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load raw profiling data from the dump +// file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *, + const BasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first->getName(); + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second->getName(); + else + O << "0"; + + return O << ")"; +} + +/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one +/// (or both) may not be defined. +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + // Undefined + undefined = undefined. + if (A == ProfileDataLoader::Uncounted) return B; + if (B == ProfileDataLoader::Uncounted) return A; + + // Saturate to the maximum storable value. This could change taken/nottaken + // ratios, but is presumably better than wrapping and thus potentially + // inverting ratios. + uint64_t tmp = (uint64_t)A + (uint64_t)B; + if (tmp > (uint64_t)ProfileDataLoader::MaxCount) + tmp = ProfileDataLoader::MaxCount; + return (unsigned)tmp; +} + +/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' +template <typename T> +static void ReadProfilingData(const char *ToolName, FILE *F, + T *Data, size_t NumEntries) { + // Read in the block of data... + if (fread(Data, sizeof(T), NumEntries, F) != NumEntries) + report_fatal_error(Twine(ToolName) + ": Profiling data truncated"); +} + +/// ReadProfilingNumEntries - Read how many entries are in this profiling data +/// packet. +static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, + bool ShouldByteSwap) { + unsigned Entry; + ReadProfilingData<unsigned>(ToolName, F, &Entry, 1); + return ShouldByteSwap ? ByteSwap_32(Entry) : Entry; +} + +/// ReadProfilingBlock - Read the number of entries in the next profiling data +/// packet and then accumulate the entries into 'Data'. +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector<unsigned, 32> &Data) { + // Read the number of entries... + unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the data. + SmallVector<unsigned, 8> TempSpace(NumEntries); + ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries); + + // Make sure we have enough space ... + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileDataLoader::Uncounted); + + // Accumulate the data we just read into the existing data. + for (unsigned i = 0; i < NumEntries; ++i) { + unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i]; + Data[i] = AddCounts(Entry, Data[i]); + } +} + +/// ReadProfilingArgBlock - Read the command line arguments that the progam was +/// run with when the current profiling data packet(s) were generated. +static void ReadProfilingArgBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + SmallVector<std::string, 1> &CommandLines) { + // Read the number of bytes ... + unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + + // Read in the arguments (if there are any to read). Round up the length to + // the nearest 4-byte multiple. + SmallVector<char, 8> Args(ArgLength+4); + if (ArgLength) + ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3); + + // Store the arguments. + CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); +} + +const unsigned ProfileDataLoader::Uncounted = ~0U; +const unsigned ProfileDataLoader::MaxCount = ~0U - 1U; + +/// ProfileDataLoader ctor - Read the specified profiling data file, reporting +/// a fatal error if the file is invalid or broken. +ProfileDataLoader::ProfileDataLoader(const char *ToolName, + const std::string &Filename) + : Filename(Filename) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) + report_fatal_error(Twine(ToolName) + ": Error opening '" + + Filename + "': "); + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. This can happen when the compiler host and target have + // different endianness. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType; + + switch (PacketType) { + case ArgumentInfo: + ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + default: + report_fatal_error(std::string(ToolName) + + ": Unknown profiling packet type"); + break; + } + } + + fclose(F); +} diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp new file mode 100644 index 0000000000..c43cff05a4 --- /dev/null +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -0,0 +1,188 @@ +//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loads profiling data from a dump file and sets branch weight +// metadata. +// +// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" +// once ProfileInfo etc. has been removed. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-metadata-loader" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); +STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); + +static cl::opt<std::string> +ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-metadata-loader")); + +namespace { + /// This pass loads profiling data from a dump file and sets branch weight + /// metadata. + class ProfileMetadataLoaderPass : public ModulePass { + std::string Filename; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileMetadataLoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); + if (filename.empty()) Filename = ProfileMetadataFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profile loader"; + } + + virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, + ArrayRef<unsigned>); + virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>); + virtual void setBranchWeightMetadata(Module&, ProfileData&); + + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char ProfileMetadataLoaderPass::ID = 0; +INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) +INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", + "Load profile information from llvmprof.out", false, true) + +char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename, +/// making it available to the optimizers. +ModulePass *llvm::createProfileMetadataLoaderPass() { + return new ProfileMetadataLoaderPass(); +} +ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { + return new ProfileMetadataLoaderPass(Filename); +} + +/// readEdge - Take the value from a profile counter and assign it to an edge. +void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, + ProfileData &PB, ProfileData::Edge e, + ArrayRef<unsigned> Counters) { + if (ReadCount >= Counters.size()) return; + + unsigned weight = Counters[ReadCount]; + assert(weight != ProfileDataLoader::Uncounted); + PB.addEdgeWeight(e, weight); + + DEBUG(dbgs() << "-- Read Edge Counter for " << e + << " (# "<< (ReadCount) << "): " + << PB.getEdgeWeight(e) << "\n"); +} + +/// matchEdges - Link every profile counter with an edge. +unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, + ArrayRef<unsigned> Counters) { + if (Counters.size() == 0) return 0; + + unsigned ReadCount = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); + readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), + Counters); + } + } + } + + return ReadCount; +} + +/// setBranchWeightMetadata - Translate the counter values associated with each +/// edge into branch weights for each conditional branch (a branch with 2 or +/// more desinations). +void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, + ProfileData &PB) { + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + unsigned NumSuccessors = TI->getNumSuccessors(); + + // If there is only one successor then we can not set a branch + // probability as the target is certain. + if (NumSuccessors < 2) continue; + + // Load the weights of all edges leading from this terminator. + DEBUG(dbgs() << "-- Terminator with " << NumSuccessors + << " successors:\n"); + SmallVector<uint32_t, 4> Weights(NumSuccessors); + for (unsigned s = 0 ; s < NumSuccessors ; ++s) { + ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); + Weights[s] = (uint32_t)PB.getEdgeWeight(edge); + DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " + << Weights[s] << "\n"); + } + + // Set branch weight metadata. This will set branch probabilities of + // 100%/0% if that is true of the dynamic execution. + // BranchProbabilityInfo can account for this when it loads this metadata + // (it gives the unexectuted branch a weight of 1 for the purposes of + // probability calculations). + MDBuilder MDB(TI->getContext()); + MDNode *Node = MDB.createBranchWeights(Weights); + TI->setMetadata(LLVMContext::MD_prof, Node); + NumTermsAnnotated++; + } + } +} + +bool ProfileMetadataLoaderPass::runOnModule(Module &M) { + ProfileDataLoader PDL("profile-data-loader", Filename); + ProfileData PB; + + ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts(); + + unsigned ReadCount = matchEdges(M, PB, Counters); + + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + + setBranchWeightMetadata(M, PB); + + return ReadCount > 0; +} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 63468f8426..12b59e0a6f 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -286,7 +286,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { } } - double fraction = floor(BBWeight/Edges.size()); + double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 173de2c027..b5b7ac1e50 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -1016,40 +1016,14 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { } } -raw_ostream& operator<<(raw_ostream &O, const Function *F) { - return O << F->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { return O << MF->getFunction()->getName() << "(MF)"; } -raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { - return O << BB->getName(); -} - raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { return O << MBB->getBasicBlock()->getName() << "(MB)"; } -raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { O << "("; diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 868f4834b7..0f9a8b3ac4 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -47,7 +47,7 @@ static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", cl::values( clEnumValN(Region::PrintNone, "none", "print no details"), clEnumValN(Region::PrintBB, "bb", - "print regions in detail with block_node_iterator"), + "print regions in detail with block_iterator"), clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); @@ -246,22 +246,6 @@ void Region::verifyRegionNest() const { verifyRegion(); } -Region::block_node_iterator Region::block_node_begin() { - return GraphTraits<FlatIt<Region*> >::nodes_begin(this); -} - -Region::block_node_iterator Region::block_node_end() { - return GraphTraits<FlatIt<Region*> >::nodes_end(this); -} - -Region::const_block_node_iterator Region::block_node_begin() const { - return GraphTraits<FlatIt<const Region*> >::nodes_begin(this); -} - -Region::const_block_node_iterator Region::block_node_end() const { - return GraphTraits<FlatIt<const Region*> >::nodes_end(this); -} - Region::element_iterator Region::element_begin() { return GraphTraits<Region*>::nodes_begin(this); } @@ -425,10 +409,8 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2 + 2); if (Style == PrintBB) { - for (const_block_node_iterator I = block_node_begin(), - E = block_node_end(); - I != E; ++I) - OS << **I << ", "; // TODO: remove the last "," + for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I) + OS << (*I)->getName() << ", "; // TODO: remove the last "," } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last ", @@ -445,9 +427,11 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2) << "} \n"; } +#ifndef NDEBUG void Region::dump() const { print(dbgs(), true, getDepth(), printStyle.getValue()); } +#endif void Region::clearNodeCache() { // Free the cached nodes. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index c97b5ebd7d..9208fa21d7 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -195,10 +195,9 @@ public: virtual bool runOnRegion(Region *R, RGPassManager &RGM) { Out << Banner; - for (Region::block_node_iterator I = R->block_node_begin(), - E = R->block_node_end(); + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); I != E; ++I) - (*I)->getEntry()->print(Out); + (*I)->print(Out); return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index a654648578..84e147bf8e 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -122,10 +122,12 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // +#ifndef NDEBUG void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { switch (getSCEVType()) { diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index ff5010bad7..dbb9535728 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -43,9 +43,11 @@ void Trace::print(raw_ostream &O) const { O << "; Trace parent function: \n" << *F; } +#ifndef NDEBUG /// dump - Debugger convenience method; writes trace to standard error /// output stream. /// void Trace::dump() const { print(dbgs()); } +#endif diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index cea34e16e7..491224a4b6 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1614,7 +1614,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // right. unsigned PtrSize = TD.getPointerSizeInBits(); if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + Offset = SignExtend64(Offset, PtrSize); return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); } diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index 55684f7023..639f5ac269 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -66,7 +66,7 @@ namespace llvm { fmag[1] = '\n'; } - bool checkSignature() { + bool checkSignature() const { return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2); } }; diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 5cfc8109be..5052495c0d 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -79,7 +79,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) } // Cast archive member header - ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; + const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At; At += sizeof(ArchiveMemberHeader); int flags = 0; @@ -196,7 +196,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) /* FALL THROUGH */ default: - char* slash = (char*) memchr(Hdr->name, '/', 16); + const char* slash = (const char*) memchr(Hdr->name, '/', 16); if (slash == 0) slash = Hdr->name + 16; pathname.assign(Hdr->name, slash - Hdr->name); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index e045804594..6e61665443 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -510,6 +510,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(asm); KEYWORD(sideeffect); KEYWORD(alignstack); + KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(ccc); @@ -554,7 +555,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); - KEYWORD(ia_nsdialect); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index a9c7e98964..b0b64d89d9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -962,7 +962,6 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; - case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -2070,16 +2069,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_asm: { // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT - bool HasSideEffect, AlignStack; + bool HasSideEffect, AlignStack, AsmDialect; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || ParseOptionalToken(lltok::kw_alignstack, AlignStack) || + ParseOptionalToken(lltok::kw_inteldialect, AsmDialect) || ParseStringConstant(ID.StrVal) || ParseToken(lltok::comma, "expected comma in inline asm expression") || ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1); + ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1) | + (unsigned(AsmDialect)<<2); ID.Kind = ValID::t_InlineAsm; return false; } @@ -2496,7 +2497,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, + (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2))); return false; } case ValID::t_MDNode: diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 9fd63f2e9a..37cbf3003e 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -72,6 +72,7 @@ namespace lltok { kw_asm, kw_sideeffect, kw_alignstack, + kw_inteldialect, kw_gc, kw_c, @@ -107,7 +108,6 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, - kw_ia_nsdialect, kw_type, kw_opaque, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 21130a1438..9130ddef16 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1245,7 +1245,9 @@ bool BitcodeReader::ParseConstants() { V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } - case bitc::CST_CODE_INLINEASM: { + // This maintains backward compatibility, pre-asm dialect keywords. + // FIXME: Remove with the 4.0 release. + case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) return Error("Invalid INLINEASM record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; @@ -1266,6 +1268,31 @@ bool BitcodeReader::ParseConstants() { AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } + // This version adds support for the asm dialect keywords (e.g., + // inteldialect). + case bitc::CST_CODE_INLINEASM: { + if (Record.size() < 2) return Error("Invalid INLINEASM record"); + std::string AsmStr, ConstrStr; + bool HasSideEffects = Record[0] & 1; + bool IsAlignStack = (Record[0] >> 1) & 1; + unsigned AsmDialect = Record[0] >> 2; + unsigned AsmStrSize = Record[1]; + if (2+AsmStrSize >= Record.size()) + return Error("Invalid INLINEASM record"); + unsigned ConstStrSize = Record[2+AsmStrSize]; + if (3+AsmStrSize+ConstStrSize > Record.size()) + return Error("Invalid INLINEASM record"); + + for (unsigned i = 0; i != AsmStrSize; ++i) + AsmStr += (char)Record[2+i]; + for (unsigned i = 0; i != ConstStrSize; ++i) + ConstrStr += (char)Record[3+AsmStrSize+i]; + PointerType *PTy = cast<PointerType>(CurTy); + V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()), + AsmStr, ConstrStr, HasSideEffects, IsAlignStack, + InlineAsm::AsmDialect(AsmDialect)); + break; + } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record"); Type *FnTy = getTypeByID(Record[0]); @@ -2845,7 +2872,7 @@ bool BitcodeReader::InitStream() { } bool BitcodeReader::InitStreamFromBuffer() { - const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); + const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 1d2dfc3437..94ebe190d4 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -814,7 +814,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | - unsigned(IA->isAlignStack()) << 1); + unsigned(IA->isAlignStack()) << 1 | + unsigned(IA->getDialect()&1) << 2); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 64a68d3d9c..7c73383cd0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1532,10 +1532,9 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { return Base; // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } + unsigned Width = TD.getPointerSizeInBits(); + if (Width < 64) + Offset = SignExtend64(Offset, Width); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index db43b06c70..f24a14f29a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -68,7 +68,8 @@ static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { } /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { assert(!Str.empty() && "Can't emit empty inline asm block"); // Remember if the buffer is nul terminated or not so we can avoid a copy. @@ -126,6 +127,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. @@ -363,7 +365,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), LocMD); + EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect()); // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 3776848e3f..0885285b1b 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -182,6 +182,12 @@ void DIEValue::dump() { void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { unsigned Size = ~0U; switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + if (Asm->OutStreamer.hasRawTextSupport()) + Asm->OutStreamer.EmitRawText(StringRef("")); + return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: Size = 1; break; @@ -203,6 +209,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru case dwarf::DW_FORM_data1: return sizeof(int8_t); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d30e5bbd8e..e58589719a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -51,6 +51,15 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { return Value; } +/// addFlag - Add a flag that is true. +void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { + if (!DD->useDarwinGDBCompat()) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, + DIEIntegerOne); + else + addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); +} + /// addUInt - Add an unsigned integer attribute data and value. /// void CompileUnit::addUInt(DIE *Die, unsigned Attribute, @@ -794,7 +803,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_prototyped); } break; case dwarf::DW_TAG_structure_type: @@ -825,15 +834,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(ElemDie, dwarf::DW_AT_declaration); + addFlag(ElemDie, dwarf::DW_AT_external); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); @@ -883,7 +892,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) @@ -895,8 +904,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isObjcClassComplete()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type, - dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -929,7 +937,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -1028,8 +1036,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // AT_specification code in order to work around a bug in older // gdbs that requires the linkage name to resolve multiple template // functions. + // TODO: Remove this set of code when we get rid of the old gdb + // compatibility. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + if (!LinkageName.empty() && DD->useDarwinGDBCompat()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); @@ -1043,6 +1053,11 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return SPDie; } + // Add the linkage name if we have one. + if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1055,7 +1070,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_prototyped); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1079,7 +1094,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. @@ -1093,19 +1108,19 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { DIType ATy = DIType(DIType(Args.getElement(i))); addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_artificial); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_external); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); if (unsigned isa = Asm->getISAEncoding()) { addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); @@ -1168,7 +1183,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add scoping info. if (!GV.isLocalToUnit()) - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addFlag(VariableDIE, dwarf::DW_AT_external); // Add line number info. addSourceLine(VariableDIE, GV); @@ -1193,8 +1208,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, - 1); + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); @@ -1260,7 +1274,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); // Emit derived type. addType(&Buffer, CTy->getTypeDerivedFrom()); @@ -1333,8 +1347,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { } if (DV->isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, - dwarf::DW_FORM_flag, 1); + addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { DV->setDIE(VariableDie); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b4ff9e8d69..22401fe8fa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -176,6 +176,9 @@ public: } public: + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, unsigned Attribute); + /// addUInt - Add an unsigned integer attribute data and value. /// void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index abac337042..b3fa181e4e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -54,9 +54,29 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, +namespace { + enum DefaultOnOff { + Default, Enable, Disable + }; +} + +static cl::opt<DefaultOnOff> DwarfAccelTables("dwarf-accel-tables", cl::Hidden, cl::desc("Output prototype dwarf accelerator tables."), - cl::init(false)); + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, + cl::desc("Compatibility with Darwin gdb."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); namespace { const char *DWARFGroupName = "DWARF Emission"; @@ -135,10 +155,25 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables for Darwin. - if (Triple(M->getTargetTriple()).isOSDarwin()) - DwarfAccelTables = true; - + // Turn on accelerator tables and older gdb compatibility + // for Darwin. + bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + if (DarwinGDBCompat == Default) { + if (isDarwin) + isDarwinGDBCompat = true; + else + isDarwinGDBCompat = false; + } else + isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + + if (DwarfAccelTables == Default) { + if (isDarwin) + hasDwarfAccelTables = true; + else + hasDwarfAccelTables = false; + } else + hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -282,7 +317,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { - SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -294,7 +329,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DIType ATy = DIType(DIType(Args.getElement(i))); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addFlag(Arg, dwarf::DW_AT_artificial); SPDie->addChild(Arg); } DIE *SPDeclDie = SPDie; @@ -603,7 +638,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) @@ -844,8 +879,8 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit info into a dwarf accelerator table sections. - if (DwarfAccelTables) { + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { emitAccelNames(); emitAccelObjC(); emitAccelNamespaces(); @@ -853,7 +888,10 @@ void DwarfDebug::endModule() { } // Emit info into a debug pubtypes section. - emitDebugPubTypes(); + // TODO: When we don't need the option anymore we can + // remove all of the code that adds to the table. + if (useDarwinGDBCompat()) + emitDebugPubTypes(); // Emit info into a debug loc section. emitDebugLoc(); @@ -868,7 +906,11 @@ void DwarfDebug::endModule() { emitDebugMacInfo(); // Emit inline info. - emitDebugInlineInfo(); + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); // Emit info into a debug str section. emitDebugStr(); @@ -1467,8 +1509,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) - TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), MMI->getFrameMoves())); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index b16e947476..4f1c9a4b09 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -307,6 +307,9 @@ class DwarfDebug { // table for the same directory as DW_at_comp_dir. StringRef CompilationDir; + // A holder for the DarwinGDBCompat flag so that the compile unit can use it. + bool isDarwinGDBCompat; + bool hasDwarfAccelTables; private: /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -529,6 +532,11 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); + + /// useDarwinGDBCompat - returns whether or not to limit some of our debug + /// output to the limitations of darwin gdb. + bool useDarwinGDBCompat() { return isDarwinGDBCompat; } + bool useDwarfAccelTables() { return hasDwarfAccelTables; } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 75f6056c44..fe9e493609 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -43,26 +43,6 @@ protected: /// MMI - Collected machine module information. MachineModuleInfo *MMI; - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - /// SharedTypeIds - How many leading type ids two landing pads have in common. static unsigned SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R); @@ -119,6 +99,26 @@ protected: const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); + + /// EmitExceptionTable - Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. void EmitExceptionTable(); public: diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index b9d2cfd4ed..efe022b074 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1570,8 +1570,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); } else { - if (Uses.count(Reg)) { - Uses.erase(Reg); + if (Uses.erase(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Uses.erase(*SubRegs); // Use sub-registers to be conservative } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2e189ad7e7..386509b702 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -95,6 +95,7 @@ add_llvm_library(LLVMCodeGen SplitKit.cpp StackProtector.cpp StackSlotColoring.cpp + StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 939af3f0cc..bc5258ef7d 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -9,7 +9,6 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/Function.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,8 +41,7 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); LiveIntervals &LIS = getAnalysis<LiveIntervals>(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index fb2c2e83f1..65f0941287 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -56,6 +56,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); + initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f9347efdb0..c40c5acf11 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -18,7 +18,6 @@ #define DEBUG_TYPE "early-ifcvt" #include "MachineTraceMetrics.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -775,8 +774,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 7a17331ba1..ffe4b63c1b 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -190,8 +189,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 4214ba1242..31e36f0168 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "ifcvt" #include "BranchFolding.h" -#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -282,7 +281,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"); + << MF.getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { DEBUG(dbgs() << " skipped\n"); @@ -997,14 +996,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs, } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; - if (Redefs.count(Reg)) { + if (!Redefs.insert(Reg)) { if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, true/*IsImp*/,false/*IsKill*/, false/*IsDead*/,true/*IsUndef*/)); } else { - Redefs.insert(Reg); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Redefs.insert(*SubRegs); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 07e37af57f..622127cc74 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -613,7 +613,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, propagateSiblingValue(SVI); } while (!WorkList.empty()); - // Look up the value we were looking for. We already did this lokup at the + // Look up the value we were looking for. We already did this lookup at the // top of the function, but SibValues may have been invalidated. SVI = SibValues.find(UseVNI); assert(SVI != SibValues.end() && "Didn't compute requested info"); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index d631726538..defc1279ec 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -687,8 +687,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " - << ((Value*)mf.getFunction())->getName() - << " **********\n"); + << mf.getName() << " **********\n"); bool Changed = collectDebugValues(mf); computeIntervals(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 0a795e644c..3e9b485ca8 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "RegisterCoalescer.h" #include <algorithm> using namespace llvm; @@ -142,6 +143,48 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } +bool LiveInterval::overlaps(const LiveInterval &Other, + const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty interval"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { @@ -705,9 +748,11 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } +#ifndef NDEBUG void LiveRange::dump() const { dbgs() << *this << "\n"; } +#endif void LiveInterval::print(raw_ostream &OS) const { if (empty()) @@ -740,9 +785,11 @@ void LiveInterval::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#endif #ifndef NDEBUG void LiveInterval::verify() const { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index d0f8ae1af3..9b81772207 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "LiveRangeCalc.h" +#include "VirtRegMap.h" #include <algorithm> #include <limits> #include <cmath> @@ -155,9 +156,11 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { MF->print(OS, Indexes); } +#ifndef NDEBUG void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } +#endif static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { @@ -382,8 +385,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, /// which a variable is live void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); RegMaskBlocks.resize(MF->getNumBlockIDs()); @@ -734,12 +736,28 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Register allocator hooks. // -void LiveIntervals::addKillFlags() { +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector<std::pair<LiveInterval*, LiveInterval::iterator>, 8> RU; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; LiveInterval *LI = &getInterval(Reg); + if (LI->empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + LiveInterval *RUInt = &getRegUnit(*Units); + if (RUInt->empty()) + continue; + RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + } // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -750,7 +768,32 @@ void LiveIntervals::addKillFlags() { MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) continue; - MI->addRegisterKilled(Reg, NULL); + + // Check if any of the reguints are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX<kill> + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + bool CancelKill = false; + for (unsigned u = 0, e = RU.size(); u != e; ++u) { + LiveInterval *RInt = RU[u].first; + LiveInterval::iterator &I = RU[u].second; + if (I == RInt->end()) + continue; + I = RInt->advanceTo(I, RI->end); + if (I == RInt->end() || I->start >= RI->end) + continue; + // I is overlapping RI. + CancelKill = true; + break; + } + if (CancelKill) + MI->clearRegisterKills(Reg, NULL); + else + MI->addRegisterKilled(Reg, NULL); } } } @@ -1174,7 +1217,7 @@ private: SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx); if (LastUse != NewIdx) moveKillFlags(LI->reg, NewIdx, LastUse); - LR->end = LastUse.getRegSlot(); + LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber()); } void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { @@ -1188,7 +1231,7 @@ private: assert(LR->end > OldIdx && "LiveRange does not cover original slot"); moveKillFlags(LI->reg, LR->end, NewIdx); } - LR->end = NewIdx.getRegSlot(); + LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber()); } } diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d828f25932..c3ff4f1b6d 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -65,7 +65,11 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Visit all operands that read Reg. This may include partial defs. for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ++I) { - const MachineOperand &MO = I.getOperand(); + MachineOperand &MO = I.getOperand(); + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); if (!MO.readsReg()) continue; // MI is reading Reg. We may have visited MI before if it happens to be diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index cdb1776812..7f22478d01 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveRegMatrix.h" +#include "RegisterCoalescer.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -117,8 +118,9 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg) { if (VirtReg.empty()) return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) return true; return false; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 348ed3a0f9..6c94690e77 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -65,6 +65,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { } void LiveVariables::VarInfo::dump() const { +#ifndef NDEBUG dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) @@ -77,6 +78,7 @@ void LiveVariables::VarInfo::dump() const { dbgs() << "\n #" << i << ": " << *Kills[i]; dbgs() << "\n"; } +#endif } /// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index a7c9f46f4d..0d44fd9938 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -229,9 +229,11 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { return 0; } +#ifndef NDEBUG void MachineBasicBlock::dump() const { print(dbgs()); } +#endif StringRef MachineBasicBlock::getName() const { if (const BasicBlock *LBB = getBasicBlock()) @@ -244,7 +246,7 @@ StringRef MachineBasicBlock::getName() const { std::string MachineBasicBlock::getFullName() const { std::string Name; if (getParent()) - Name = (getParent()->getFunction()->getName() + ":").str(); + Name = (getParent()->getName() + ":").str(); if (getBasicBlock()) Name += getBasicBlock()->getName(); else diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d4aede8a7e..c282332037 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -284,12 +284,19 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, return std::make_pair(Result, Result + Num); } +#ifndef NDEBUG void MachineFunction::dump() const { print(dbgs()); } +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ": "; + OS << "# Machine code for function " << getName() << ": "; if (RegInfo) { OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); if (!RegInfo->tracksLiveness()) @@ -334,7 +341,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { BB->print(OS, Indexes); } - OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; + OS << "\n# End machine code for function " << getName() << ".\n\n"; } namespace llvm { @@ -344,7 +351,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName().str() + "' function"; + return "CFG for '" + F->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -377,7 +384,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -387,7 +394,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -453,7 +460,9 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false, false)); + /*isSS*/ false, + /*NeedSP*/ false, + /*Alloca*/ 0)); return -++NumFixedObjects; } @@ -525,9 +534,11 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ } } +#ifndef NDEBUG void MachineFrameInfo::dump(const MachineFunction &MF) const { print(MF, dbgs()); } +#endif //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation @@ -622,7 +633,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << '\n'; } +#ifndef NDEBUG void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// @@ -749,10 +762,12 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(Value*)Constants[i].Val.ConstVal; + OS << *(const Value*)Constants[i].Val.ConstVal; OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } } +#ifndef NDEBUG void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b166849946..0508b9f612 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,7 @@ void MachineOperand::setIsDef(bool Val) { /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. if (isReg() && isOnRegUseList()) @@ -136,7 +137,8 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, RegInfo = &MF->getRegInfo(); // If this operand is already a register operand, remove it from the // register's use/def lists. - if (RegInfo && isReg()) + bool WasReg = isReg(); + if (RegInfo && WasReg) RegInfo->removeRegOperandFromUseList(this); // Change this to a register and set the reg#. @@ -153,6 +155,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsDebug = isDebug; // Ensure isOnRegUseList() returns false. Contents.Reg.Prev = 0; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; // If this operand is embedded in a function, add the operand to the // register's use/def list. @@ -208,8 +213,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { hash_code llvm::hash_value(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), - MO.getSubReg(), MO.isDef()); + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); case MachineOperand::MO_Immediate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); case MachineOperand::MO_CImmediate: @@ -262,7 +267,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber()) { + isInternalRead() || isEarlyClobber() || isTied()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -282,27 +287,32 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { + if (isKill()) { if (NeedComma) OS << ','; - NeedComma = false; - if (isKill()) { - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + NeedComma = true; } OS << '>'; } @@ -673,6 +683,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (!isImpReg && !isInlineAsm()) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); if (RegInfo) RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } @@ -708,12 +719,25 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Operands[OpNo].isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. Operands[OpNo].Contents.Reg.Prev = 0; + // Ignore existing ties. This is not a property that can be copied. + Operands[OpNo].TiedTo = 0; // Add the new operand to RegInfo. if (RegInfo) RegInfo->addRegOperandToUseList(&Operands[OpNo]); - // If the register operand is flagged as early, mark the operand as such. - if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (Operands[OpNo].isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + Operands[OpNo].setIsEarlyClobber(true); + } } // Re-add all the implicit ops. @@ -730,6 +754,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + untieRegOperand(OpNo); MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. @@ -752,6 +777,13 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + Operands.erase(Operands.begin()+OpNo); if (RegInfo) { @@ -935,6 +967,12 @@ bool MachineInstr::isStackAligningInlineAsm() const { return false; } +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo) const { assert(isInlineAsm() && "Expected an inline asm instruction"); @@ -1114,107 +1152,99 @@ int MachineInstr::findFirstPredOperandIdx() const { return -1; } -/// isRegTiedToUseOperand - Given the index of a register def operand, -/// check if the register def is tied to a source operand, due to either -/// two-address elimination or inline assembly constraints. Returns the -/// first tied use operand index by reference is UseOpIdx is not null. -bool MachineInstr:: -isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { - if (isInlineAsm()) { - assert(DefOpIdx > InlineAsm::MIOp_FirstOperand); - const MachineOperand &MO = getOperand(DefOpIdx); - if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) - return false; - // Determine the actual operand index that corresponds to this index. - unsigned DefNo = 0; - int FlagIdx = findInlineAsmFlagIdx(DefOpIdx, &DefNo); - if (FlagIdx < 0) - return false; +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; - // Which part of the group is DefOpIdx? - unsigned DefPart = DefOpIdx - (FlagIdx + 1); - - for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); - i != e; ++i) { - const MachineOperand &FMO = getOperand(i); - if (!FMO.isImm()) - continue; - if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse()) - continue; - unsigned Idx; - if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && - Idx == DefNo) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i + 1 + DefPart; - return true; - } - } - return false; +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; } - assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!"); - const MCInstrDesc &MCID = getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.isUse() && - MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) { - if (UseOpIdx) - *UseOpIdx = (unsigned)i; - return true; - } - } - return false; + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); } -/// isRegTiedToDefOperand - Return true if the operand of the specified index -/// is a register use and it is tied to an def operand. It also returns the def -/// operand index by reference. -bool MachineInstr:: -isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { - if (isInlineAsm()) { - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) - return false; +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); - // Find the flag operand corresponding to UseOpIdx - int FlagIdx = findInlineAsmFlagIdx(UseOpIdx); - if (FlagIdx < 0) - return false; + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; - const MachineOperand &UFMO = getOperand(FlagIdx); - unsigned DefNo; - if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { - if (!DefOpIdx) - return true; - - unsigned DefIdx = InlineAsm::MIOp_FirstOperand; - // Remember to adjust the index. First operand is asm string, second is - // the HasSideEffects and AlignStack bits, then there is a flag for each. - while (DefNo) { - const MachineOperand &FMO = getOperand(DefIdx); - assert(FMO.isImm()); - // Skip over this def. - DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1; - --DefNo; - } - *DefOpIdx = DefIdx + UseOpIdx - FlagIdx; - return true; + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; } - return false; + llvm_unreachable("Can't find tied use"); } - const MCInstrDesc &MCID = getDesc(); - if (UseOpIdx >= MCID.getNumOperands()) - return false; - const MachineOperand &MO = getOperand(UseOpIdx); - if (!MO.isReg() || !MO.isUse()) - return false; - int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO); - if (DefIdx == -1) - return false; - if (DefOpIdx) - *DefOpIdx = (unsigned)DefIdx; - return true; + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector<unsigned, 8> GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); } /// clearKillInfo - Clears kill flags on all operands. @@ -1292,7 +1322,12 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (mayStore() || isCall()) { + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { SawStore = true; return false; } @@ -1308,8 +1343,8 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // load. if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the load is volatile, we can't move it. - return !SawStore && !hasVolatileMemoryRef(); + // end of block, we can't move it. + return !SawStore; return true; } @@ -1340,11 +1375,11 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, return true; } -/// hasVolatileMemoryRef - Return true if this instruction may have a -/// volatile memory reference, or if the information describing the -/// memory reference is not available. Return false if it is known to -/// have no volatile memory references. -bool MachineInstr::hasVolatileMemoryRef() const { +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { // An instruction known never to access memory won't have a volatile access. if (!mayStore() && !mayLoad() && @@ -1357,9 +1392,9 @@ bool MachineInstr::hasVolatileMemoryRef() const { if (memoperands_empty()) return true; - // Check the memory reference information for volatile references. + // Check the memory reference information for ordered references. for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) - if ((*I)->isVolatile()) + if (!(*I)->isUnordered()) return true; return false; @@ -1461,7 +1496,9 @@ void MachineInstr::copyImplicitOps(const MachineInstr *MI) { } void MachineInstr::dump() const { +#ifndef NDEBUG dbgs() << " " << *this; +#endif } static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, @@ -1540,6 +1577,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " [sideeffect]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; FirstOp = false; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index efec481dab..169443e03d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -334,7 +334,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); - DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); + DEBUG(dbgs() << MF.getName() << " ********\n"); if (PreRegAlloc) { // Estimate register pressure during pre-regalloc pass. diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 9f3829e3c0..05d2f2a885 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -74,6 +74,8 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { return BotMBB; } +#ifndef NDEBUG void MachineLoop::dump() const { print(dbgs()); } +#endif diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a1dc9481c6..4704daef02 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -252,7 +252,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getFunction()->getName() + DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; @@ -764,12 +764,14 @@ public: Queue.pop_back(); } +#ifndef NDEBUG void dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } +#endif }; /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance @@ -905,13 +907,12 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; } Top.releaseNode(SU, SU->TopReadyCycle); } @@ -925,13 +926,12 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = - DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); + unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; } Bot.releaseNode(SU, SU->BotReadyCycle); } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 852c169254..181e09ecc9 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,8 +23,9 @@ // the verifier errors. //===----------------------------------------------------------------------===// +#include "llvm/BasicBlock.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -213,6 +214,8 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void verifyInlineAsm(const MachineInstr *MI); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); @@ -357,7 +360,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { @@ -365,7 +368,7 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { report(msg, MBB->getParent()); *OS << "- basic block: BB#" << MBB->getNumber() << ' ' << MBB->getName() - << " (" << (void*)MBB << ')'; + << " (" << (const void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -695,6 +698,49 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { } } +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, and Extra_IsAlignStack = 2. + if (!isUInt<2>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -703,6 +749,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { << MI->getNumExplicitOperands() << " given.\n"; } + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { @@ -758,6 +808,17 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) @@ -772,6 +833,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + // Verify two-address constraints after leaving SSA form. unsigned DefIdx; if (!MRI->isSSA() && MO->isUse() && diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index d68c6740f8..82e1764d13 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -457,8 +457,8 @@ void TargetPassConfig::addMachinePasses() { const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); assert (TPI && IPI && "Pass ID not registered!"); - const char *TID = (char *)(TPI->getTypeInfo()); - const char *IID = (char *)(IPI->getTypeInfo()); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); insertPass(TID, IID); } @@ -539,6 +539,10 @@ void TargetPassConfig::addMachineSSAOptimization() { // instructions dead. addPass(&OptimizePHIsID); + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. addPass(&LocalStackSlotAllocationID); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 7449ff5460..6090752081 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -240,6 +240,7 @@ void SchedulePostRATDList::exitRegion() { ScheduleDAGInstrs::exitRegion(); } +#ifndef NDEBUG /// dumpSchedule - dump the scheduled Sequence. void SchedulePostRATDList::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -249,6 +250,7 @@ void SchedulePostRATDList::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); @@ -298,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":BB#" << MBB->getNumber() << " ***\n"; } #endif diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 34d075c232..e4e18c3bb5 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -137,8 +137,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)MF.getFunction())->getName() << '\n'); + << "********** Function: " << MF.getName() << '\n'); bool Changed = false; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 3a03807ebd..8a49609552 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -20,7 +20,6 @@ #include "VirtRegMap.h" #include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -273,7 +272,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, bool RABasic::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getName() << '\n'); MF = &mf; RegAllocBase::init(getAnalysis<VirtRegMap>(), diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 6b3a48eefd..f573d419ea 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1110,8 +1110,7 @@ void RAFast::AllocateBasicBlock() { /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" - << "********** Function: " - << ((Value*)Fn.getFunction())->getName() << '\n'); + << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index d0cff481cb..c021a937b6 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -24,7 +24,6 @@ #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -1746,8 +1745,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" - << "********** Function: " - << mf.getFunction()->getName() << '\n'); + << "********** Function: " << mf.getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index d0db26b208..fcdbce75d9 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -192,7 +192,6 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const MachineLoopInfo *loopInfo, const RegSet &vregs) { - typedef std::vector<const LiveInterval*> LIVector; LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -556,7 +555,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { mri->freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); // Allocator main loop: // @@ -570,11 +569,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); +#ifndef NDEBUG const Function* func = mf->getFunction(); std::string fqn = func->getParent()->getModuleIdentifier() + "." + func->getName().str(); - (void)fqn; +#endif // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 990633440e..d018835456 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1564,8 +1564,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { Loops = &getAnalysis<MachineLoopInfo>(); DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)MF->getFunction())->getName() << '\n'); + << "********** Function: " << MF->getName() << '\n'); if (VerifyCoalescing) MF->verify(this, "Before register coalescing"); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 8a6df988f1..47c3df1460 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -63,6 +63,13 @@ namespace llvm { : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 43448c850a..6cdfe7cd72 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -63,6 +63,7 @@ void RegisterPressure::decrease(const TargetRegisterClass *RC, decreaseSetPressure(MaxSetPressure, RC, TRI); } +#ifndef NDEBUG void RegisterPressure::dump(const TargetRegisterInfo *TRI) { dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) @@ -78,6 +79,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) { << '\n'; } } +#endif /// Increase the current pressure as impacted by these physical registers and /// bump the high water mark if needed. diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 752f8e4080..af8cd8f347 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -279,6 +279,7 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +#ifndef NDEBUG /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { @@ -336,6 +337,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } dbgs() << "\n"; } +#endif #ifndef NDEBUG /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 9c1dba355b..2d8f235c66 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -209,7 +209,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); addVRegUseDeps(&ExitSU, i); @@ -225,15 +225,15 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(&ExitSU); + Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); } } } /// MO is an operand of SU's instruction that defines a physical register. Add /// data dependencies from SU to any uses of the physical register. -void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, - const MachineOperand &MO) { +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); assert(MO.isDef() && "expect physreg def"); // Ask the target if address-backscheduling is desirable, and if so how much. @@ -245,11 +245,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector<SUnit*> &UseList = Uses[*Alias]; + std::vector<PhysRegSUOper> &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i]; + SUnit *UseSU = UseList[i].SU; if (UseSU == SU) continue; + MachineInstr *UseMI = UseSU->getInstr(); + int UseOp = UseList[i].OpIdx; unsigned LDataLatency = DataLatency; // Optionally add in a special extra latency for nodes that // feed addresses. @@ -258,7 +260,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // adjustSchedDependency for the targets that care about it. if (SpecialAddressLatency != 0 && !UnitLatencies && UseSU != &ExitSU) { - MachineInstr *UseMI = UseSU->getInstr(); const MCInstrDesc &UseMCID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias); assert(RegUseIndex >= 0 && "UseMI doesn't use register!"); @@ -273,8 +274,15 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // perform its own adjustments. SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - unsigned Latency = computeOperandLatency(SU, UseSU, dep); + unsigned Latency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, SU->getInstr(), OperIdx, + (UseOp < 0 ? 0 : UseMI), UseOp, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); ST.adjustSchedDependency(SU, UseSU, dep); } @@ -301,9 +309,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector<SUnit *> &DefList = Defs[*Alias]; + std::vector<PhysRegSUOper> &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i]; + SUnit *DefSU = DefList[i].SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -324,14 +332,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(SU); + Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); } else { - addPhysRegDataDeps(SU, MO); + addPhysRegDataDeps(SU, OperIdx); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector<SUnit *> &DefList = Defs[MO.getReg()]; + std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()]; // If a def is going to wrap back around to the top of the loop, // backschedule it. @@ -393,11 +401,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // the block. Instead, we leave only one call at the back of the // DefList. if (SU->isCall) { - while (!DefList.empty() && DefList.back()->isCall) + while (!DefList.empty() && DefList.back().SU->isCall) DefList.pop_back(); } // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(SU); + DefList.push_back(PhysRegSUOper(SU, OperIdx)); } } @@ -468,8 +476,14 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + int DefOp = Def->findRegisterDefOperandIdx(Reg); + unsigned Latency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx); dep.setLatency(Latency); + unsigned MinLatency = + TII->computeOperandLatency(InstrItins, Def, DefOp, MI, OperIdx, + /*FindMin=*/true); + dep.setMinLatency(MinLatency); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); @@ -488,7 +502,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && + (MI->hasOrderedMemoryRef() && (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) return true; return false; @@ -997,19 +1011,10 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - const SDep& dep, - bool FindMin) const { - // For a data dependency with a known register... - if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return 1; - - return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), - Use->getInstr(), dep.getReg(), FindMin); -} - void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#ifndef NDEBUG SU->getInstr()->dump(); +#endif } std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 38feee95a5..6e781b199a 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -35,7 +34,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const ScheduleDAG *G) { - return G->MF.getFunction()->getName(); + return G->MF.getName(); } static bool renderGraphFromBottomUp() { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index e675366485..5ca22b23fe 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -89,6 +89,7 @@ void ScoreboardHazardRecognizer::Reset() { ReservedScoreboard.reset(); } +#ifndef NDEBUG void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; @@ -104,6 +105,7 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << '\n'; } } +#endif bool ScoreboardHazardRecognizer::atIssueLimit() const { if (IssueWidth == 0) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 10491d98cb..e2a64cff96 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2496,8 +2496,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // lanes of the constant together. EVT VT = Vector->getValueType(0); unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); } } @@ -5681,6 +5691,127 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // In unsafe math mode, we can fold chains of FADD's of the same value + // into multiplications. This transform is not safe in general because + // we are reducing the number of rounding steps. + if (DAG.getTarget().Options.UnsafeFPMath && + TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && + !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul c, x), x) -> (fmul c+1, x) + if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fmul x, c), x) -> (fmul c+1, x) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, NewCFP); + } + + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && + N0.getOperand(0) == N1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP00, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + + // (fadd x, (fmul c, x)) -> (fmul c+1, x) + if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fmul x, c)) -> (fmul c+1, x) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, NewCFP); + } + + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + + // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) + if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(1) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP10, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(1), NewCFP); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) + if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), NewCFP); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getConstantFP(4.0, VT)); + } + } + // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && @@ -5692,8 +5823,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1); } - - // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, @@ -5867,6 +5998,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); @@ -5877,6 +6009,58 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FMUL && + N0 == N2.getOperand(0) && + N2.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); + } + + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FMUL && N1CFP && + N0.getOperand(1).getOpcode() == ISD::ConstantFP) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), + N2); + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorkList(RHSNeg.getNode()); + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + // (fma x, c, x) -> (fmul x, (c+1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, VT))); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, + N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, VT))); + } + + return SDValue(); } @@ -6246,6 +6430,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } } + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0), + DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + N0.getOperand(1))); + } + } + return SDValue(); } @@ -7876,29 +8071,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // If the element type of the input vector is not the same as - // the output element type, make concat_vectors based on input element - // type and then bitcast it to the output vector type. - // - // In another words avoid nodes like this: - // <NODE> v16i8 = concat_vectors v4i16 v4i16 - // Replace it with this one: - // <NODE0> v8i16 = concat_vectors v4i16 v4i16 - // <NODE1> v16i8 = bitcast NODE0 - EVT ItemType = VecIn1.getValueType().getVectorElementType(); - if (ItemType != VT.getVectorElementType()) { - EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), - ItemType, - VecIn1.getValueType().getVectorNumElements()*2); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); - VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); - } else - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); - + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } // If VecIn2 is unused then change it to undef. @@ -8754,7 +8929,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { // to alias with anything but itself. Provides base object and offset as // results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, void *&CV) { + const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = 0; CV = 0; @@ -8779,8 +8954,8 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, // for ConstantSDNodes since the same constant pool entry may be represented // by multiple nodes with different offsets. if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { - CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() - : (void *)C->getConstVal(); + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); Offset += C->getOffset(); return false; } @@ -8805,7 +8980,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; - void *CV1, *CV2; + const void *CV1, *CV2; bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 3e18ea7ac9..b2a2a5cb25 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -97,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP); + MayNeedSP, AI); } for (; BB != EB; ++BB) diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4488d2790b..6d2cdeabd1 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -55,7 +55,8 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { /// /// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding /// the chain and glue. These operands may be implicit on the machine instr. -static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; @@ -63,7 +64,8 @@ static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { --N; // Ignore chain if it exists. // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. - for (unsigned I = N; I; --I) { + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) @@ -720,7 +722,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumImpUses = 0; - unsigned NodeOperands = countOperands(Node, NumImpUses); + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -870,6 +873,17 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; } + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) @@ -890,19 +904,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, getZExtValue(); MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + GroupIdx.push_back(MI->getNumOperands()); MI->addOperand(MachineOperand::CreateImm(Flags)); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast @@ -913,7 +931,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: - for (; NumVals; --NumVals, ++i) { + for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), @@ -928,9 +946,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_Mem: // Addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (; NumVals; --NumVals, ++i) + for (unsigned j = 0; j != NumVals; ++j, ++i) AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MI->tieOperands(DefIdx + j, UseIdx + j); + } + } break; } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 908ebb9486..7b341700b8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2042,7 +2042,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, DebugLoc dl) { - if (Op0.getValueType() == MVT::i32) { + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion // Get the stack frame index of a 8 byte buffer. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 94fc9761ec..37f0e60087 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -625,6 +625,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VSETCC(SDNode* N); + SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 704f99bcf0..22f8d51ab2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -64,6 +64,7 @@ class VectorLegalizer { // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); @@ -220,6 +221,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; @@ -260,6 +262,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); + else if (Node->getOpcode() == ISD::SELECT) + Result = ExpandSELECT(Op); else if (Node->getOpcode() == ISD::UINT_TO_FP) Result = ExpandUINT_TO_FLOAT(Op); else if (Node->getOpcode() == ISD::FNEG) @@ -435,6 +439,66 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = TLI.getSetCCResultType(VT); + assert(MaskTy.isVector() && "Invalid CC type"); + assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() + && "Invalid mask size"); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), + DAG.getConstant(0, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -449,12 +513,17 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(true) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() + assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() && "Invalid mask size"); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4709202965..4095728ee0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1366,6 +1366,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1373,6 +1376,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SetWidenedVector(SDValue(N, ResNo), Res); } +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + DebugLoc dl = N->getDebugLoc(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. unsigned Opcode = N->getOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index bf0a43785b..2b86e36991 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -656,6 +656,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { break; case ISD::MERGE_VALUES: case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: case ISD::CopyToReg: case ISD::CopyFromReg: case ISD::EH_LABEL: @@ -1756,6 +1758,7 @@ public: return V; } +#ifndef NDEBUG void dump(ScheduleDAG *DAG) const { // Emulate pop() without clobbering NodeQueueIds. std::vector<SUnit*> DumpQueue = Queue; @@ -1766,6 +1769,7 @@ public: SU->dump(DAG); } } +#endif }; typedef RegReductionPriorityQueue<bu_ls_rr_sort> @@ -1893,6 +1897,7 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { //===----------------------------------------------------------------------===// void RegReductionPQBase::dumpRegPressure() const { +#ifndef NDEBUG for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) { const TargetRegisterClass *RC = *I; @@ -1902,6 +1907,7 @@ void RegReductionPQBase::dumpRegPressure() const { DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] << '\n'); } +#endif } bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 748668cdf6..222dc559a2 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -643,6 +643,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#ifndef NDEBUG if (!SU->getNode()) { dbgs() << "PHYS REG COPY\n"; return; @@ -659,8 +660,10 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { dbgs() << "\n"; GluedNodes.pop_back(); } +#endif } +#ifndef NDEBUG void ScheduleDAGSDNodes::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) @@ -669,6 +672,7 @@ void ScheduleDAGSDNodes::dumpSchedule() const { dbgs() << "**** NOOP ****\n"; } } +#endif #ifndef NDEBUG /// VerifyScheduledSequence - Verify that all SUnits were scheduled and that diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f4fe8927f6..928385a0ad 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1097,10 +1097,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - EVT PTy = TLI.getPointerTy(); - unsigned BitWidth = PTy.getSizeInBits(); + unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); if (BitWidth < 64) - Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); + Offset = SignExtend64(Offset, BitWidth); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -3928,12 +3927,16 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -3983,17 +3986,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic store does not load; a release store "loads" in the sense - // that other stores cannot be sunk past it. + // An atomic store does not load. An atomic load does not store. // (An atomicrmw obviously both loads and stores.) - unsigned Flags = MachineMemOperand::MOStore; - if (Opcode != ISD::ATOMIC_STORE || Ordering > Monotonic) - Flags |= MachineMemOperand::MOLoad; - - // For now, atomics are considered to be volatile always. + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4056,16 +4059,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // A monotonic load does not store; an acquire load "stores" in the sense - // that other loads cannot be hoisted past it. - unsigned Flags = MachineMemOperand::MOLoad; - if (Ordering > Monotonic) - Flags |= MachineMemOperand::MOStore; - - // For now, atomics are considered to be volatile always. + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - Flags |= MachineMemOperand::MOVolatile; + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, @@ -4157,6 +4161,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c814086473..9263a2a3aa 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DebugInfo.h" @@ -825,6 +826,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, GFI = gfi; LibInfo = li; TD = DAG.getTarget().getTargetData(); + Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -1765,6 +1767,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, /// visitBitTestCase - this function produces one "bit test" void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { @@ -1802,8 +1805,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - addSuccessorWithWeight(SwitchBB, B.TargetBB); - addSuccessorWithWeight(SwitchBB, NextMBB); + // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. + addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); + // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. + addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1926,6 +1931,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: @@ -1959,8 +1965,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - addSuccessorWithWeight(SwitchBB, Small.BB); - addSuccessorWithWeight(SwitchBB, Default); + // Both Small and Big will jump to Small.BB, so we sum up the weights. + addSuccessorWithWeight(SwitchBB, Small.BB, + Small.ExtraWeight + Big.ExtraWeight); + addSuccessorWithWeight(SwitchBB, Default, + // The default destination is the first successor in IR. + BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1978,14 +1988,13 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } // Order cases by weight so the most likely case will be checked first. - BranchProbabilityInfo *BPI = FuncInfo.BPI; + uint32_t UnhandledWeights = 0; if (BPI) { for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { - uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - I->BB->getBasicBlock()); + uint32_t IWeight = I->ExtraWeight; + UnhandledWeights += IWeight; for (CaseItr J = CR.Range.first; J < I; ++J) { - uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), - J->BB->getBasicBlock()); + uint32_t JWeight = J->ExtraWeight; if (IWeight > JWeight) std::swap(*I, *J); } @@ -2034,10 +2043,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, LHS = I->Low; MHS = SV; RHS = I->High; } - uint32_t ExtraWeight = I->ExtraWeight; + // The false weight should be sum of all un-handled cases. + UnhandledWeights -= I->ExtraWeight; CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, /* me */ CurBlock, - /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); + /* trueweight */ I->ExtraWeight, + /* falseweight */ UnhandledWeights); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -2137,13 +2148,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } } + // Calculate weight for each unique destination in CR. + DenseMap<MachineBasicBlock*, uint32_t> DestWeights; + if (FuncInfo.BPI) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(I->BB); + if (Itr != DestWeights.end()) + Itr->second += I->ExtraWeight; + else + DestWeights[I->BB] = I->ExtraWeight; + } + // Update successor info. Add one edge to each unique successor. BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(), E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - addSuccessorWithWeight(JumpTableBB, *I); + DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr = + DestWeights.find(*I); + addSuccessorWithWeight(JumpTableBB, *I, + Itr != DestWeights.end() ? Itr->second : 0); } } @@ -2374,7 +2400,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, if (i == count) { assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); + CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); count++; } @@ -2383,6 +2409,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, uint64_t lo = (lowValue - lowBound).getZExtValue(); uint64_t hi = (highValue - lowBound).getZExtValue(); + CasesBits[i].ExtraWeight += I->ExtraWeight; for (uint64_t j = lo; j <= hi; j++) { CasesBits[i].Mask |= 1ULL << j; @@ -2410,7 +2437,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CurMF->insert(BBI, CaseBB); BTC.push_back(BitTestCase(CasesBits[i].Mask, CaseBB, - CasesBits[i].BB)); + CasesBits[i].BB, CasesBits[i].ExtraWeight)); // Put SV in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(SV); @@ -2438,30 +2465,25 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, Clusterifier TheClusterifier; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB); + TheClusterifier.add(i.getCaseValueEx(), SMBB, + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); } TheClusterifier.optimize(); - BranchProbabilityInfo *BPI = FuncInfo.BPI; size_t numCmps = 0; for (Clusterifier::RangeIterator i = TheClusterifier.begin(), e = TheClusterifier.end(); i != e; ++i, ++numCmps) { Clusterifier::Cluster &C = *i; - unsigned W = 0; - if (BPI) { - W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); - if (!W) - W = 16; - W *= C.first.Weight; - BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); - } + // Update edge weight for the cluster. + unsigned W = C.first.Weight; // FIXME: Currently work with ConstantInt based numbers. // Changing it to APInt based is a pretty heavy for this commit. @@ -4853,7 +4875,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, MVT::i32)); + DAG.getIntPtrConstant(Idx)); + setValue(&I, Res); + return 0; + } + case Intrinsic::x86_avx_vextractf128_pd_256: + case Intrinsic::x86_avx_vextractf128_ps_256: + case Intrinsic::x86_avx_vextractf128_si_256: + case Intrinsic::x86_avx2_vextracti128: { + DebugLoc dl = getCurDebugLoc(); + EVT DestVT = TLI.getValueType(I.getType()); + uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * + DestVT.getVectorNumElements(); + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + getValue(I.getArgOperand(0)), + DAG.getIntPtrConstant(Idx)); setValue(&I, Res); return 0; } @@ -5180,14 +5216,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { rw==1)); /* write */ return 0; } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) { + if (Intrinsic == Intrinsic::lifetime_start) + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + } + SDValue Ops[2]; + AllocaInst *LifetimeObject =dyn_cast_or_null<AllocaInst>( + GetUnderlyingObject(I.getArgOperand(1), TD)); + // Could not find an Alloca. + if (!LifetimeObject) + return 0; + int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + Ops[0] = getRoot(); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + + Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + DAG.setRoot(Res); + return 0; + } case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return 0; case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: // Discard region information. return 0; case Intrinsic::donothing: @@ -6065,12 +6123,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect and AlignStack bits as operand 3. + // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 4090002314..3b7615a757 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -150,9 +150,11 @@ private: uint64_t Mask; MachineBasicBlock* BB; unsigned Bits; + uint32_t ExtraWeight; - CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits): - Mask(mask), BB(bb), Bits(bits) { } + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + uint32_t Weight): + Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { } }; typedef std::vector<Case> CaseVector; @@ -247,11 +249,13 @@ private: typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr): - Mask(M), ThisBB(T), TargetBB(Tr) { } + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + uint32_t Weight): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; + uint32_t ExtraWeight; }; typedef SmallVector<BitTestCase, 3> BitTestInfo; @@ -325,7 +329,7 @@ public: CodeGenOpt::Level ol) : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), - HasTailCall(false), Context(dag.getContext()) { + HasTailCall(false) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa, @@ -452,6 +456,7 @@ public: void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, + uint32_t BranchWeightToNext, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 66f87b4ba0..bf338990ce 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -267,6 +267,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -338,7 +340,7 @@ void SDNode::dump(const SelectionDAG *G) const { } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; + OS << (const void*)this << ": "; for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ","; @@ -566,7 +568,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, child->printr(OS, G); once.insert(child); } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; + OS << (const void*)child; if (unsigned RN = N->getOperand(i).getResNo()) OS << ":" << RN; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9f277fd8b4..0337492049 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -554,7 +554,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getName().str() + ":" + + BlockName = MF->getName().str() + ":" + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber @@ -1201,7 +1201,12 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); } + uint32_t UnhandledWeight = 0; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) + UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; // Set the current basic block to the mbb we wish to insert the code into FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); @@ -1209,12 +1214,14 @@ SelectionDAGISel::FinishBasicBlock() { if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Cases[j+1].ThisBB, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i], SDB->BitTestCases[i].Default, + UnhandledWeight, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -1786,10 +1793,13 @@ WalkChainUsers(const SDNode *ChainedNode, User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - if (User->getOpcode() == ISD::CopyToReg || - User->getOpcode() == ISD::CopyFromReg || - User->getOpcode() == ISD::INLINEASM || - User->getOpcode() == ISD::EH_LABEL) { + unsigned UserOpcode = User->getOpcode(); + if (UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { // If their node ID got reset to -1 then they've already been selected. // Treat them like a MachineOpcode. if (User->getNodeId() == -1) @@ -2205,6 +2215,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. return 0; case ISD::AssertSext: @@ -2973,7 +2985,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); - Msg << "\nIn function: " << MF->getFunction()->getName(); + Msg << "\nIn function: " << MF->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 173ffac329..3921635652 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -14,7 +14,6 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -50,7 +49,7 @@ namespace llvm { template<typename EdgeIter> static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { - return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -73,7 +72,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getFunction()->getName(); + return G->getMachineFunction().getName(); } static bool renderGraphFromBottomUp() { @@ -146,7 +145,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), + ViewGraph(this, "dag." + getMachineFunction().getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175c1b..dcaa9ba923 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -772,7 +772,7 @@ void TargetLowering::computeRegisterProperties() { LegalIntReg = IntReg; } else { RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (MVT::SimpleValueType)LegalIntReg; + (const MVT::SimpleValueType)LegalIntReg; ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); } } @@ -898,7 +898,6 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return PointerTy.SimpleTy; @@ -2441,7 +2440,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal; + uint64_t EqVal = 0; switch (getBooleanContents(N0.getValueType().isVector())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 21ae2f5e56..4fbe1b3605 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -159,7 +159,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName().str(); + std::string MFName = MF->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -187,7 +187,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapThisFunction) { dbgs() << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + << MF->getName() << "\n"; }); if (calculateSets(Fn)) @@ -364,7 +364,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": uses no callee-saved registers\n"); return false; } @@ -384,7 +384,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getFunction()->getName() + dbgs() << "DISABLED: " << Fn.getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -466,7 +466,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { @@ -478,7 +478,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } @@ -505,7 +505,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() + DEBUG(dbgs() << "DISABLED: " << Fn.getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; @@ -521,7 +521,7 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - dbgs() << "ENABLED: " << Fn.getFunction()->getName(); + dbgs() << "ENABLED: " << Fn.getName(); if (HasFastExitPath) dbgs() << " (fast exit path)"; dbgs() << "\n"; @@ -861,7 +861,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "-----------------------------------------------------------\n"; dbgs() << "total iterations = " << iterations << " ( " - << Fn.getFunction()->getName() + << Fn.getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; @@ -984,7 +984,7 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " @@ -1032,7 +1032,7 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getFunction()->getName() << ": " + DEBUG(dbgs() << MF->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 980bd7414c..7f46a062fa 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -196,53 +196,38 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - Value *One = ConstantInt::get(Int32Ty, 1); - Value *Two = ConstantInt::get(Int32Ty, 2); - Value *Three = ConstantInt::get(Int32Ty, 3); - Value *Four = ConstantInt::get(Int32Ty, 4); - - Value *Idxs[2] = { Zero, 0 }; - for (unsigned I = 0, E = LPads.size(); I != E; ++I) { LandingPadInst *LPI = LPads[I]; IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt()); // Reference the __data field. - Idxs[1] = Two; - Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data"); + Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Idxs[1] = Zero; - Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep"); + Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, + "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); - ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Idxs[1] = One; - Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep"); + Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, + "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function - Idxs[1] = Three; + IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FuncCtx, Idxs, "pers_fn_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, + "pers_fn_gep"); + Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - Idxs[1] = Four; - Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); + Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); return FuncCtx; } @@ -417,48 +402,31 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - - Value *Idxs[2] = { - ConstantInt::get(Int32Ty, 0), 0 - }; + IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "jbuf_gep", - EntryBB->getTerminator()); + Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); + Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); + Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + Val = Builder.CreateCall(StackAddrFn, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "", EntryBB->getTerminator()); + Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); + Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. - Value *FuncCtxArg = CastInst::Create(Instruction::BitCast, FuncCtx, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - CallInst::Create(FuncCtxFn, FuncCtxArg, "", EntryBB->getTerminator()); + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c8c3fb37ad..c98efb480c 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -143,6 +143,7 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { } +#ifndef NDEBUG void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { @@ -159,6 +160,7 @@ void SlotIndexes::dump() const { dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } +#endif // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { @@ -168,9 +170,11 @@ void SlotIndex::print(raw_ostream &os) const { os << "invalid"; } +#ifndef NDEBUG // Dump a SlotIndex to stderr. void SlotIndex::dump() const { print(dbgs()); dbgs() << "\n"; } +#endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 4a2b7ec1cf..96151b6363 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -356,6 +356,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit->anyRematerializable(0); } +#ifndef NDEBUG void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; @@ -366,6 +367,7 @@ void SplitEditor::dump() const { dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); dbgs() << '\n'; } +#endif VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000000..e1fc52d662 --- /dev/null +++ b/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,657 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackcoloring" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/DebugInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +DisableColoring("no-stack-coloring", + cl::init(true), cl::Hidden, + cl::desc("Suppress stack coloring")); + +STATISTIC(NumMarkerSeen, "Number of life markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + DenseMap<MachineBasicBlock*, BlockLifetimeInfo> BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap<MachineBasicBlock*, int> BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector<MachineBasicBlock*, 8> BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector<LiveInterval*, 16> Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes* Indexes; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector<MachineInstr*, 8> Markers; + + /// SlotSizeSorter - A Sort utility for arranging stack slots according + /// to their size. + struct SlotSizeSorter { + MachineFrameInfo *MFI; + SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { } + bool operator()(int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + } +}; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + /// Debug. + void dump(); + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap<int, int> &SlotRemap); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() { + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + unsigned Num = BasicBlocks[*FI]; + DEBUG(dbgs()<<"Inspecting block #"<<Num<<" ["<<FI->getName()<<"]\n"); + Num = 0; + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].Begin.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"END : {"); + for (unsigned i=0; i < BlockLiveness[*FI].End.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].End.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"LIVE_IN: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveIn.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + DEBUG(dbgs()<<"LIVEOUT: {"); + for (unsigned i=0; i < BlockLiveness[*FI].LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockLiveness[*FI].LiveOut.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + } +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) { + unsigned MarkersFound = 0; + // Scan the function to find all lifetime markers. + // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // deterministic numbering, and because we'll need a post-order iteration + // later for solving the liveness dataflow problem. + for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF); + FI != FE; ++FI) { + + // Assign a serial number to this basic block. + BasicBlocks[*FI] = BasicBlockNumbering.size();; + BasicBlockNumbering.push_back(*FI); + + BlockLiveness[*FI].Begin.resize(NumSlot); + BlockLiveness[*FI].End.resize(NumSlot); + + for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end(); + BI != BE; ++BI) { + + if (BI->getOpcode() != TargetOpcode::LIFETIME_START && + BI->getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(BI); + + bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &MI = BI->getOperand(0); + unsigned Slot = MI.getIndex(); + + MarkersFound++; + + const Value* Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found lifetime marker for allocation: "<< + Allocation->getName()<<"\n"); + } + + if (IsStart) { + BlockLiveness[*FI].Begin.set(Slot); + } else { + if (BlockLiveness[*FI].Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockLiveness[*FI].Begin.reset(Slot); + } else { + BlockLiveness[*FI].End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet<MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet<MachineBasicBlock*, 8> NextBBSet; + + for (SmallVector<MachineBasicBlock*, 8>::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { + + MachineBasicBlock *BB = *PI; + if (!BBSet.count(BB)) continue; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + LocalLiveIn |= BlockLiveness[*PI].LiveOut; + LocalLiveIn |= BlockLiveness[BB].End; + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + LocalLiveOut |= BlockLiveness[*SI].LiveIn; + LocalLiveOut |= BlockLiveness[BB].Begin; + LocalLiveOut.reset(BlockLiveness[BB].End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockLiveness[BB].End); + LocalLiveIn.reset(BlockLiveness[BB].Begin); + + if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + changed = true; + BlockLiveness[BB].LiveIn |= LocalLiveIn; + + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) + NextBBSet.insert(*PI); + } + + if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + changed = true; + BlockLiveness[BB].LiveOut |= LocalLiveOut; + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + NextBBSet.insert(*SI); + } + } + + BBSet = NextBBSet; + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector<SlotIndex, 16> Starts; + SmallVector<SlotIndex, 16> Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end(); + MBB != MBBe; ++MBB) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + BitVector Alive = BlockLiveness[MBB].LiveIn; + Alive |= BlockLiveness[MBB].LiveOut; + + if (Alive.any()) { + for (int pos = Alive.find_first(); pos != -1; + pos = Alive.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(MBB); + Finishes[pos] = Indexes->getMBBEndIdx(MBB); + } + } + + for (SmallVector<MachineInstr*, 8>::iterator it = Markers.begin(), + e = Markers.end(); it != e; ++it) { + MachineInstr *MI = *it; + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + if (MI->getParent() == MBB) { + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + if (IsStart) { + Starts[Slot] = Indexes->getInstructionIndex(MI); + } else { + Finishes[Slot] = Indexes->getInstructionIndex(MI); + } + } + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(!!Starts[i] == !!Finishes[i] && "Unmatched range"); + if (Starts[i] == Finishes[i]) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addRange(LiveRange(S, F, ValNum)); + } else { + // We have two non consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); + Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); + Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (unsigned i = 0; i < Markers.size(); ++i) { + Markers[i]->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + return Count; +} + +void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + const MDNode *Var = VI->first; + if (!Var) continue; + std::pair<unsigned, DebugLoc> &VP = VI->second; + if (SlotRemap.count(VP.first)) { + DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n"); + VP.first = SlotRemap[VP.first]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap<const Value*, const Value*> Allocas; + for (DenseMap<int, int>::iterator it = SlotRemap.begin(), + e = SlotRemap.end(); it != e; ++it) { + const Value* From = MFI->getObjectAllocation(it->first); + const Value* To = MFI->getObjectAllocation(it->second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + } + + // Remap all instructions to the new stack slots. + MachineFunction::iterator BB, BBE; + MachineBasicBlock::iterator I, IE; + for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) + for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { + + // Update the MachineMemOperand to use the new alloca. + for (MachineInstr::mmo_iterator MM = I->memoperands_begin(), + E = I->memoperands_end(); MM != E; ++MM) { + MachineMemOperand *MMO = *MM; + + const Value *V = MMO->getValue(); + + if (!V) + continue; + + // Climb up and find the original alloca. + V = GetUnderlyingObject(V); + // If we did not find one, or if the one that we found is not in our + // map, then move on. + if (!V || !Allocas.count(V)) + continue; + + MMO->setValue(Allocas[V]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { + MachineOperand &MO = I->getOperand(i); + + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +} + +void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector<int, 8> SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); + DEBUG(dbgs()<<"Slot structure:\n"); + + for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + + // Don't continue because there are not enough lifetime markers, or the + // stack or too small, or we are told not to optimize the slots. + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + DEBUG(dbgs()<<"Will not try to merge slots.\n"); + return removeAllMarkers(); + } + + for (unsigned i=0; i < NumSlots; ++i) { + LiveInterval *LI = new LiveInterval(i, 0); + Intervals.push_back(LI); + LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Maps old slots to new slots. + DenseMap<int, int> SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); + + bool Chanded = true; + while (Chanded) { + Chanded = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=0; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = Intervals[FirstSlot]; + LiveInterval *Second = Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Chanded = true; + First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<<I<<" and slots #"<<J<<" together.\n"); + unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< + ReducedSize<<" bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); + + // Release the intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + delete Intervals[I]; + } + + return removeAllMarkers(); +} diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 20da36e8fb..9d0fd0aa20 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackcoloring" -#include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -391,8 +390,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Stack Slot Coloring **********\n" - << "********** Function: " - << MF.getFunction()->getName() << '\n'; + << "********** Function: " << MF.getName() << '\n'; }); MFI = MF.getFrameInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 5b06195046..39fd600d4a 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -404,9 +404,9 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { } void StrongPHIElimination::addReg(unsigned Reg) { - if (RegNodeMap.count(Reg)) - return; - RegNodeMap[Reg] = new (Allocator) Node(Reg); + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); } StrongPHIElimination::Node* @@ -714,8 +714,9 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, assert(getRegColor(CopyReg) == CopyReg); } - if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) - InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); } SrcMO.setReg(CopyReg); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index ddee6b2401..7e7f835040 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -99,17 +99,8 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, if (NewMI) { // Create a new instruction. - bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false; MachineFunction &MF = *MI->getParent()->getParent(); - if (HasDef) - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); - else - return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) - .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); + MI = MF.CloneMachineInstr(MI); } if (HasDef) { @@ -645,9 +636,16 @@ static int computeDefOperandLatency( } /// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. /// -/// FindMin may be set to get the minimum vs. expected latency. +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -660,7 +658,13 @@ computeOperandLatency(const InstrItineraryData *ItinData, assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } if (OperLatency >= 0) return OperLatency; @@ -673,77 +677,3 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an -/// unknown use. Depending on the subtarget's itinerary properties, this may or -/// may not need to call getOperandLatency(). -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. -/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const TargetRegisterInfo *TRI, - const MachineInstr *DefMI, const MachineInstr *UseMI, - unsigned Reg, bool FindMin) const { - - int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - // Find the definition of the register in the defining instruction. - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> - // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - // For all uses of the register, calculate the maxmimum latency - int OperLatency = -1; - - // UseMI is null, then it must be a scheduling barrier. - if (!UseMI) { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - else { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); - OperLatency = std::max(OperLatency, UseCycle); - } - } - // If we found an operand latency, we're done. - if (OperLatency >= 0) - return OperLatency; - } - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index aa601af21b..bd12f92132 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1202,8 +1202,7 @@ bool TwoAddressInstructionPass:: collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { const MCInstrDesc &MCID = MI->getDesc(); bool AnyOps = false; - unsigned NumOps = MI->isInlineAsm() ? - MI->getNumOperands() : MCID.getNumOperands(); + unsigned NumOps = MI->getNumOperands(); for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { unsigned DstIdx = 0; @@ -1373,7 +1372,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 93840f0544..bd10a4b8d0 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -19,7 +19,6 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" #include "LiveDebugVariables.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -127,9 +126,11 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { OS << '\n'; } +#ifndef NDEBUG void VirtRegMap::dump() const { print(dbgs()); } +#endif //===----------------------------------------------------------------------===// // VirtRegRewriter @@ -197,11 +198,11 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { VRM = &getAnalysis<VirtRegMap>(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " - << MF->getFunction()->getName() << '\n'); + << MF->getName() << '\n'); DEBUG(VRM->dump()); // Add kill flags while we still have virtual registers. - LIS->addKillFlags(); + LIS->addKillFlags(VRM); // Live-in lists on basic blocks are required for physregs. addMBBLiveIns(); diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 441f1e86dc..1e9e509fd2 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -8,5 +8,6 @@ add_llvm_library(LLVMDebugInfo DWARFDebugAranges.cpp DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp + DWARFDebugRangeList.cpp DWARFFormValue.cpp ) diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp index e2fd55fd6e..ead57f9715 100644 --- a/lib/DebugInfo/DIContext.cpp +++ b/lib/DebugInfo/DIContext.cpp @@ -18,7 +18,9 @@ DIContext *DIContext::getDWARFContext(bool isLittleEndian, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) { + StringRef stringSection, + StringRef rangeSection) { return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection, - aRangeSection, lineSection, stringSection); + aRangeSection, lineSection, stringSection, + rangeSection); } diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index b27d57bef1..bdd65b77e4 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -63,7 +63,7 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, Version = debug_info_data.getU16(&offset); bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset(); Abbrevs = abbrevs; - AddrSize = debug_info_data.getU8 (&offset); + AddrSize = debug_info_data.getU8(&offset); bool versionOK = DWARFContext::isSupportedVersion(Version); bool addrSizeOK = AddrSize == 4 || AddrSize == 8; @@ -75,6 +75,15 @@ DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, return 0; } +bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const { + // Require that compile unit is extracted. + assert(DieArray.size() > 0); + DataExtractor RangesData(Context.getRangeSection(), + Context.isLittleEndian(), AddrSize); + return RangeList.extract(RangesData, &RangeListOffset); +} + void DWARFCompileUnit::clear() { Offset = 0; Length = 0; @@ -94,7 +103,9 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << " (next CU at " << format("0x%08x", getNextCompileUnitOffset()) << ")\n"; - getCompileUnitDIE(false)->dump(OS, this, -1U); + const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false); + assert(CU && "Null Compile Unit?"); + CU->dump(OS, this, -1U); } const char *DWARFCompileUnit::getCompilationDir() { @@ -174,11 +185,11 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { addDIE(die); return 1; } - else if (depth == 0 && initial_die_array_size == 1) { + else if (depth == 0 && initial_die_array_size == 1) // Don't append the CU die as we already did that - } else { - addDIE (die); - } + ; + else + addDIE(die); const DWARFAbbreviationDeclaration *abbrDecl = die.getAbbreviationDeclarationPtr(); @@ -199,9 +210,9 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { // Give a little bit of info if we encounter corrupt DWARF (our offset // should always terminate at or before the start of the next compilation // unit header). - if (offset > next_cu_offset) { - fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); - } + if (offset > next_cu_offset) + fprintf(stderr, "warning: DWARF compile unit extends beyond its" + "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); setDIERelations(); return DieArray.size(); @@ -244,12 +255,21 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, clearDIEs(true); } -const DWARFDebugInfoEntryMinimal* -DWARFCompileUnit::getFunctionDIEForAddress(int64_t address) { +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) { + // First, find a subprogram that contains the given address (the root + // of inlined chain). extractDIEsIfNeeded(false); + const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0; for (size_t i = 0, n = DieArray.size(); i != n; i++) { - if (DieArray[i].addressRangeContainsAddress(this, address)) - return &DieArray[i]; + if (DieArray[i].isSubprogramDIE() && + DieArray[i].addressRangeContainsAddress(this, Address)) { + SubprogramDIE = &DieArray[i]; + break; + } } - return 0; + // Get inlined chain rooted at this subprogram DIE. + if (!SubprogramDIE) + return DWARFDebugInfoEntryMinimal::InlinedChain(); + return SubprogramDIE->getInlinedChainForAddress(this, Address); } diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index b34a5965af..03e28620d4 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -12,6 +12,7 @@ #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" +#include "DWARFDebugRangeList.h" #include <vector> namespace llvm { @@ -45,6 +46,11 @@ public: /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it /// hasn't already been done. Returns the number of DIEs parsed at this call. size_t extractDIEsIfNeeded(bool cu_die_only); + /// extractRangeList - extracts the range list referenced by this compile + /// unit from .debug_ranges section. Returns true on success. + /// Requires that compile unit is already extracted. + bool extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const; void clear(); void dump(raw_ostream &OS); uint32_t getOffset() const { return Offset; } @@ -106,11 +112,11 @@ public: void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, bool clear_dies_if_already_not_parsed); - /// getFunctionDIEForAddress - Returns pointer to parsed subprogram DIE, - /// address ranges of which contain the provided address, - /// or NULL if there is no such subprogram. The pointer - /// is valid until DWARFCompileUnit::clear() or clearDIEs() is called. - const DWARFDebugInfoEntryMinimal *getFunctionDIEForAddress(int64_t address); + + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. + DWARFDebugInfoEntryMinimal::InlinedChain getInlinedChainForAddress( + uint64_t Address); }; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 797662b083..241f55eaed 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -32,15 +32,17 @@ void DWARFContext::dump(raw_ostream &OS) { while (set.extract(arangesData, &offset)) set.dump(OS); + uint8_t savedAddressByteSize = 0; OS << "\n.debug_lines contents:\n"; for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + savedAddressByteSize = cu->getAddressByteSize(); unsigned stmtOffset = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, -1U); if (stmtOffset != -1U) { DataExtractor lineData(getLineSection(), isLittleEndian(), - cu->getAddressByteSize()); + savedAddressByteSize); DWARFDebugLine::DumpingState state(OS); DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); } @@ -54,6 +56,18 @@ void DWARFContext::dump(raw_ostream &OS) { OS << format("0x%8.8x: \"%s\"\n", lastOffset, s); lastOffset = offset; } + + OS << "\n.debug_ranges contents:\n"; + // In fact, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the last + // compile unit (there is no easy and fast way to associate address range + // list and the compile unit it describes). + DataExtractor rangesData(getRangeSection(), isLittleEndian(), + savedAddressByteSize); + offset = 0; + DWARFDebugRangeList rangeList; + while (rangeList.extract(rangesData, &offset)) + rangeList.dump(OS); } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -131,75 +145,152 @@ namespace { }; } -DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { +DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { if (CUs.empty()) parseCompileUnits(); - DWARFCompileUnit *i = std::lower_bound(CUs.begin(), CUs.end(), offset, - OffsetComparator()); - if (i != CUs.end()) - return &*i; + DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset, + OffsetComparator()); + if (CU != CUs.end()) + return &*CU; return 0; } -DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier) { +DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { // First, get the offset of the compile unit. - uint32_t cuOffset = getDebugAranges()->findAddress(address); + uint32_t CUOffset = getDebugAranges()->findAddress(Address); // Retrieve the compile unit. - DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); - if (!cu) + return getCompileUnitForOffset(CUOffset); +} + +static bool getFileNameForCompileUnit( + DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, + uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &FileName) { + if (CU == 0 || + LineTable == 0 || + !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath, + FileName)) + return false; + if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) { + // We may still need to append compilation directory of compile unit. + SmallString<16> AbsolutePath; + if (const char *CompilationDir = CU->getCompilationDir()) { + sys::path::append(AbsolutePath, CompilationDir); + } + sys::path::append(AbsolutePath, FileName); + FileName = AbsolutePath.str(); + } + return true; +} + +static bool getFileLineInfoForCompileUnit( + DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable, + uint64_t Address, bool NeedsAbsoluteFilePath, std::string &FileName, + uint32_t &Line, uint32_t &Column) { + if (CU == 0 || LineTable == 0) + return false; + // Get the index of row we're looking for in the line table. + uint32_t RowIndex = LineTable->lookupAddress(Address); + if (RowIndex == -1U) + return false; + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; + if (!getFileNameForCompileUnit(CU, LineTable, Row.File, + NeedsAbsoluteFilePath, FileName)) + return false; + Line = Row.Line; + Column = Row.Column; + return true; +} + +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) return DILineInfo(); - SmallString<16> fileName("<invalid>"); - SmallString<16> functionName("<invalid>"); - uint32_t line = 0; - uint32_t column = 0; - if (specifier.needs(DILineInfoSpecifier::FunctionName)) { - const DWARFDebugInfoEntryMinimal *function_die = - cu->getFunctionDIEForAddress(address); - if (function_die) { - if (const char *name = function_die->getSubprogramName(cu)) - functionName = name; + std::string FileName = "<invalid>"; + std::string FunctionName = "<invalid>"; + uint32_t Line = 0; + uint32_t Column = 0; + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + FunctionName = Name; } } - if (specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - // Get the line table for this compile unit. - const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); - if (lineTable) { - // Get the index of the row we're looking for in the line table. - uint32_t rowIndex = lineTable->lookupAddress(address); - if (rowIndex != -1U) { - const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; - // Take file/line info from the line table. - const DWARFDebugLine::FileNameEntry &fileNameEntry = - lineTable->Prologue.FileNames[row.File - 1]; - fileName = fileNameEntry.Name; - if (specifier.needs(DILineInfoSpecifier::AbsoluteFilePath) && - sys::path::is_relative(fileName.str())) { - // Append include directory of file (if it is present in line table) - // and compilation directory of compile unit to make path absolute. - const char *includeDir = 0; - if (uint64_t includeDirIndex = fileNameEntry.DirIdx) { - includeDir = lineTable->Prologue - .IncludeDirectories[includeDirIndex - 1]; - } - SmallString<16> absFileName; - if (includeDir == 0 || sys::path::is_relative(includeDir)) { - if (const char *compilationDir = cu->getCompilationDir()) - sys::path::append(absFileName, compilationDir); - } - if (includeDir) { - sys::path::append(absFileName, includeDir); - } - sys::path::append(absFileName, fileName.str()); - fileName = absFileName; - } - line = row.Line; - column = row.Column; + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const DWARFDebugLine::LineTable *LineTable = + getLineTableForCompileUnit(CU); + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } + return DILineInfo(StringRef(FileName), StringRef(FunctionName), + Line, Column); +} + +DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier) { + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return DIInliningInfo(); + + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() == 0) + return DIInliningInfo(); + + DIInliningInfo InliningInfo; + uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; + const DWARFDebugLine::LineTable *LineTable = 0; + for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { + const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i]; + std::string FileName = "<invalid>"; + std::string FunctionName = "<invalid>"; + uint32_t Line = 0; + uint32_t Column = 0; + // Get function name if necessary. + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + if (const char *Name = FunctionDIE.getSubroutineName(CU)) + FunctionName = Name; + } + if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + if (i == 0) { + // For the topmost frame, initialize the line table of this + // compile unit and fetch file/line info from it. + LineTable = getLineTableForCompileUnit(CU); + // For the topmost routine, get file/line info from line table. + getFileLineInfoForCompileUnit(CU, LineTable, Address, + NeedsAbsoluteFilePath, + FileName, Line, Column); + } else { + // Otherwise, use call file, call line and call column from + // previous DIE in inlined chain. + getFileNameForCompileUnit(CU, LineTable, CallFile, + NeedsAbsoluteFilePath, FileName); + Line = CallLine; + Column = CallColumn; + } + // Get call file/line/column of a current DIE. + if (i + 1 < n) { + FunctionDIE.getCallerFrame(CU, CallFile, CallLine, CallColumn); } } + DILineInfo Frame(StringRef(FileName), StringRef(FunctionName), + Line, Column); + InliningInfo.addFrame(Frame); } - return DILineInfo(fileName, functionName, line, column); + return InliningInfo; } void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index e55a27e698..76339979dd 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -13,6 +13,7 @@ #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" #include "DWARFDebugLine.h" +#include "DWARFDebugRangeList.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" @@ -53,9 +54,6 @@ public: return &CUs[index]; } - /// Return the compile unit that includes an offset (relative to .debug_info). - DWARFCompileUnit *getCompileUnitForOffset(uint32_t offset); - /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); @@ -66,8 +64,10 @@ public: const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); - virtual DILineInfo getLineInfoForAddress(uint64_t address, - DILineInfoSpecifier specifier = DILineInfoSpecifier()); + virtual DILineInfo getLineInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); + virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Specifier = DILineInfoSpecifier()); bool isLittleEndian() const { return IsLittleEndian; } @@ -76,12 +76,19 @@ public: virtual StringRef getARangeSection() = 0; virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; + virtual StringRef getRangeSection() = 0; static bool isSupportedVersion(unsigned version) { return version == 2 || version == 3; } -}; +private: + /// Return the compile unit that includes an offset (relative to .debug_info). + DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); + /// Return the compile unit which contains instruction with provided + /// address. + DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address); +}; /// DWARFContextInMemory is the simplest possible implementation of a /// DWARFContext. It assumes all content is available in memory and stores @@ -93,19 +100,22 @@ class DWARFContextInMemory : public DWARFContext { StringRef ARangeSection; StringRef LineSection; StringRef StringSection; + StringRef RangeSection; public: DWARFContextInMemory(bool isLittleEndian, StringRef infoSection, StringRef abbrevSection, StringRef aRangeSection, StringRef lineSection, - StringRef stringSection) + StringRef stringSection, + StringRef rangeSection) : DWARFContext(isLittleEndian), InfoSection(infoSection), AbbrevSection(abbrevSection), ARangeSection(aRangeSection), LineSection(lineSection), - StringSection(stringSection) + StringSection(stringSection), + RangeSection(rangeSection) {} virtual StringRef getInfoSection() { return InfoSection; } @@ -113,6 +123,7 @@ public: virtual StringRef getARangeSection() { return ARangeSection; } virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } + virtual StringRef getRangeSection() { return RangeSection; } }; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index ef470e5799..f9a34c908f 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -62,7 +62,6 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { uint32_t offset = 0; typedef std::vector<DWARFDebugArangeSet> SetCollection; - typedef SetCollection::const_iterator SetCollectionIter; SetCollection sets; DWARFDebugArangeSet set; diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 429a36c087..1bfd126a12 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -1,4 +1,4 @@ -//===-- DWARFDebugInfoEntry.cpp --------------------------------------------===// +//===-- DWARFDebugInfoEntry.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -101,7 +101,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, DataExtractor debug_info_data = cu->getDebugInfoExtractor(); uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr); - assert (fixed_form_sizes); // For best performance this should be specified! + assert(fixed_form_sizes); // For best performance this should be specified! if (abbrCode) { uint32_t offset = *offset_ptr; @@ -126,6 +126,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info. + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -150,6 +151,11 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form_size = cu->getAddressByteSize(); break; + // 0 sized form. + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -173,6 +179,7 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -188,6 +195,13 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form = debug_info_data.getULEB128(&offset); break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = Offset; return false; @@ -249,6 +263,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, switch (form) { // Blocks if inlined data that have a length field and the data // bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: form_size = debug_info_data.getULEB128(&offset); break; @@ -273,6 +288,11 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_size = cu_addr_size; break; + // 0 byte value + case DW_FORM_flag_present: + form_size = 0; + break; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -299,6 +319,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: form_size = 8; break; @@ -314,6 +335,13 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_is_indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + debug_info_data.getU32(offset_ptr); + else + debug_info_data.getU64(offset_ptr); + break; + default: *offset_ptr = offset; return false; @@ -336,6 +364,16 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, return false; } +bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const { + return getTag() == DW_TAG_subprogram; +} + +bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const { + uint32_t Tag = getTag(); + return Tag == DW_TAG_subprogram || + Tag == DW_TAG_inlined_subroutine; +} + uint32_t DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, const uint16_t attr, @@ -418,24 +456,31 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( return fail_value; } +bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, uint64_t &HighPC) const { + HighPC = -1ULL; + LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL); + if (LowPC != -1ULL) + HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL); + return (HighPC != -1ULL); +} + void -DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) +DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const { if (AbbrevDecl) { - uint16_t tag = AbbrevDecl->getTag(); - if (tag == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) - debug_aranges->appendRange(cu->getOffset(), lo_pc, hi_pc); + if (isSubprogramDIE()) { + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) { + DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC); + } + // FIXME: try to append ranges from .debug_ranges section. } const DWARFDebugInfoEntryMinimal *child = getFirstChild(); while (child) { - child->buildAddressRangeTable(cu, debug_aranges); + child->buildAddressRangeTable(CU, DebugAranges); child = child->getSibling(); } } @@ -443,51 +488,90 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFCompileUnit *cu, const uint64_t address) const { - if (!isNULL() && getTag() == DW_TAG_subprogram) { - uint64_t hi_pc = -1ULL; - uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); - if (lo_pc != -1ULL) - hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); - if (hi_pc != -1ULL) { - return (lo_pc <= address && address < hi_pc); - } + const DWARFCompileUnit *CU, const uint64_t Address) const { + if (isNULL()) + return false; + uint64_t LowPC, HighPC; + if (getLowAndHighPC(CU, LowPC, HighPC)) + return (LowPC <= Address && Address <= HighPC); + // Try to get address ranges from .debug_ranges section. + uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U); + if (RangesOffset != -1U) { + DWARFDebugRangeList RangeList; + if (CU->extractRangeList(RangesOffset, RangeList)) + return RangeList.containsAddress(CU->getBaseAddress(), Address); } return false; } const char* -DWARFDebugInfoEntryMinimal::getSubprogramName( - const DWARFCompileUnit *cu) const { - if (isNULL() || getTag() != DW_TAG_subprogram) +DWARFDebugInfoEntryMinimal::getSubroutineName( + const DWARFCompileUnit *CU) const { + if (!isSubroutineDIE()) return 0; // Try to get mangled name if possible. if (const char *name = - getAttributeValueAsString(cu, DW_AT_MIPS_linkage_name, 0)) + getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_linkage_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(cu, DW_AT_name, 0)) + if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0)) return name; // Try to get name from specification DIE. uint32_t spec_ref = - getAttributeValueAsReference(cu, DW_AT_specification, -1U); + getAttributeValueAsReference(CU, DW_AT_specification, -1U); if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; - if (spec_die.extract(cu, &spec_ref)) { - if (const char *name = spec_die.getSubprogramName(cu)) + if (spec_die.extract(CU, &spec_ref)) { + if (const char *name = spec_die.getSubroutineName(CU)) return name; } } // Try to get name from abstract origin DIE. uint32_t abs_origin_ref = - getAttributeValueAsReference(cu, DW_AT_abstract_origin, -1U); + getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U); if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; - if (abs_origin_die.extract(cu, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubprogramName(cu)) + if (abs_origin_die.extract(CU, &abs_origin_ref)) { + if (const char *name = abs_origin_die.getSubroutineName(CU)) return name; } } return 0; } + +void DWARFDebugInfoEntryMinimal::getCallerFrame( + const DWARFCompileUnit *CU, uint32_t &CallFile, uint32_t &CallLine, + uint32_t &CallColumn) const { + CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0); + CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0); + CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0); +} + +DWARFDebugInfoEntryMinimal::InlinedChain +DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( + const DWARFCompileUnit *CU, const uint64_t Address) const { + DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain; + if (isNULL()) + return InlinedChain; + for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) { + // Append current DIE to inlined chain only if it has correct tag + // (e.g. it is not a lexical block). + if (DIE->isSubroutineDIE()) { + InlinedChain.push_back(*DIE); + } + // Try to get child which also contains provided address. + const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild(); + while (Child) { + if (Child->addressRangeContainsAddress(CU, Address)) { + // Assume there is only one such child. + break; + } + Child = Child->getSibling(); + } + DIE = Child; + } + // Reverse the obtained chain to make the root of inlined chain last. + std::reverse(InlinedChain.begin(), InlinedChain.end()); + return InlinedChain; +} diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index d5d86b9ec0..9c1b2be0a7 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #include "DWARFAbbreviationDeclaration.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -19,6 +20,7 @@ class DWARFDebugAranges; class DWARFCompileUnit; class DWARFContext; class DWARFFormValue; +class DWARFInlinedSubroutineChain; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { @@ -52,6 +54,13 @@ public: uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } + + /// Returns true if DIE represents a subprogram (not inlined). + bool isSubprogramDIE() const; + /// Returns true if DIE represents a subprogram or an inlined + /// subroutine. + bool isSubroutineDIE() const; + uint32_t getOffset() const { return Offset; } uint32_t getNumAttributes() const { return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; @@ -126,17 +135,40 @@ public: const uint16_t attr, int64_t fail_value) const; - void buildAddressRangeTable(const DWARFCompileUnit *cu, - DWARFDebugAranges *debug_aranges) const; - - bool addressRangeContainsAddress(const DWARFCompileUnit *cu, - const uint64_t address) const; - - // If a DIE represents a subprogram, returns its mangled name - // (or short name, if mangled is missing). This name may be fetched - // from specification or abstract origin for this subprogram. - // Returns null if no name is found. - const char* getSubprogramName(const DWARFCompileUnit *cu) const; + /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. + /// Returns true if both attributes are present. + bool getLowAndHighPC(const DWARFCompileUnit *CU, + uint64_t &LowPC, uint64_t &HighPC) const; + + void buildAddressRangeTable(const DWARFCompileUnit *CU, + DWARFDebugAranges *DebugAranges) const; + + bool addressRangeContainsAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; + + /// If a DIE represents a subprogram (or inlined subroutine), + /// returns its mangled name (or short name, if mangled is missing). + /// This name may be fetched from specification or abstract origin + /// for this subprogram. Returns null if no name is found. + const char* getSubroutineName(const DWARFCompileUnit *CU) const; + + /// Retrieves values of DW_AT_call_file, DW_AT_call_line and + /// DW_AT_call_column from DIE (or zeroes if they are missing). + void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile, + uint32_t &CallLine, uint32_t &CallColumn) const; + + /// InlinedChain - represents a chain of inlined_subroutine + /// DIEs, (possibly ending with subprogram DIE), all of which are contained + /// in some concrete inlined instance tree. Address range for each DIE + /// (except the last DIE) in this chain is contained in address + /// range for next DIE in the chain. + typedef SmallVector<DWARFDebugInfoEntryMinimal, 4> InlinedChain; + + /// Get inlined chain for a given address, rooted at the current DIE. + /// Returns empty chain if address is not contained in address range + /// of current DIE. + InlinedChain getInlinedChainForAddress(const DWARFCompileUnit *CU, + const uint64_t Address) const; }; } diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index d99575d800..267364adfa 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -10,6 +10,7 @@ #include "DWARFDebugLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -513,3 +514,29 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { } return index; } + +bool +DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const { + if (FileIndex == 0 || FileIndex > Prologue.FileNames.size()) + return false; + const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; + const char *FileName = Entry.Name; + if (!NeedsAbsoluteFilePath || + sys::path::is_absolute(FileName)) { + Result = FileName; + return true; + } + SmallString<16> FilePath; + uint64_t IncludeDirIndex = Entry.DirIdx; + // Be defensive about the contents of Entry. + if (IncludeDirIndex > 0 && + IncludeDirIndex <= Prologue.IncludeDirectories.size()) { + const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]; + sys::path::append(FilePath, IncludeDir); + } + sys::path::append(FilePath, FileName); + Result = FilePath.str(); + return true; +} diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index 6382b45a93..586dd7e878 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -12,6 +12,7 @@ #include "llvm/Support/DataExtractor.h" #include <map> +#include <string> #include <vector> namespace llvm { @@ -174,6 +175,13 @@ public: // Returns the index of the row with file/line info for a given address, // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; + + // Extracts filename by its index in filename table in prologue. + // Returns true on success. + bool getFileNameByIndex(uint64_t FileIndex, + bool NeedsAbsoluteFilePath, + std::string &Result) const; + void dump(raw_ostream &OS) const; struct Prologue Prologue; diff --git a/lib/DebugInfo/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARFDebugRangeList.cpp new file mode 100644 index 0000000000..1806beee72 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.cpp @@ -0,0 +1,67 @@ +//===-- DWARFDebugRangesList.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugRangeList.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void DWARFDebugRangeList::clear() { + Offset = -1U; + AddressSize = 0; + Entries.clear(); +} + +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { + clear(); + if (!data.isValidOffset(*offset_ptr)) + return false; + AddressSize = data.getAddressSize(); + if (AddressSize != 4 && AddressSize != 8) + return false; + Offset = *offset_ptr; + while (true) { + RangeListEntry entry; + uint32_t prev_offset = *offset_ptr; + entry.StartAddress = data.getAddress(offset_ptr); + entry.EndAddress = data.getAddress(offset_ptr); + // Check that both values were extracted correctly. + if (*offset_ptr != prev_offset + 2 * AddressSize) { + clear(); + return false; + } + if (entry.isEndOfListEntry()) + break; + Entries.push_back(entry); + } + return true; +} + +void DWARFDebugRangeList::dump(raw_ostream &OS) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + const char *format_str = (AddressSize == 4 + ? "%08x %08" PRIx64 " %08" PRIx64 "\n" + : "%08x %016" PRIx64 " %016" PRIx64 "\n"); + OS << format(format_str, Offset, Entries[i].StartAddress, + Entries[i].EndAddress); + } + OS << format("%08x <End of list>\n", Offset); +} + +bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress, + uint64_t Address) const { + for (int i = 0, n = Entries.size(); i != n; ++i) { + if (Entries[i].isBaseAddressSelectionEntry(AddressSize)) + BaseAddress = Entries[i].EndAddress; + else if (Entries[i].containsAddress(BaseAddress, Address)) + return true; + } + return false; +} diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h new file mode 100644 index 0000000000..4e34a916f4 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugRangeList.h @@ -0,0 +1,78 @@ +//===-- DWARFDebugRangeList.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H +#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H + +#include "llvm/Support/DataExtractor.h" +#include <vector> + +namespace llvm { + +class raw_ostream; + +class DWARFDebugRangeList { +public: + struct RangeListEntry { + // A beginning address offset. This address offset has the size of an + // address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the beginning + // of an address range. + uint64_t StartAddress; + // An ending address offset. This address offset again has the size of + // an address and is relative to the applicable base address of the + // compilation unit referencing this range list. It marks the first + // address past the end of the address range. The ending address must + // be greater than or equal to the beginning address. + uint64_t EndAddress; + // The end of any given range list is marked by an end of list entry, + // which consists of a 0 for the beginning address offset + // and a 0 for the ending address offset. + bool isEndOfListEntry() const { + return (StartAddress == 0) && (EndAddress == 0); + } + // A base address selection entry consists of: + // 1. The value of the largest representable address offset + // (for example, 0xffffffff when the size of an address is 32 bits). + // 2. An address, which defines the appropriate base address for + // use in interpreting the beginning and ending address offsets of + // subsequent entries of the location list. + bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { + assert(AddressSize == 4 || AddressSize == 8); + if (AddressSize == 4) + return StartAddress == -1U; + else + return StartAddress == -1ULL; + } + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const { + return (BaseAddress + StartAddress <= Address) && + (Address < BaseAddress + EndAddress); + } + }; + +private: + // Offset in .debug_ranges section. + uint32_t Offset; + uint8_t AddressSize; + std::vector<RangeListEntry> Entries; + +public: + DWARFDebugRangeList() { clear(); } + void clear(); + void dump(raw_ostream &OS) const; + bool extract(DataExtractor data, uint32_t *offset_ptr); + /// containsAddress - Returns true if range list contains the given + /// address. Has to be passed base address of the compile unit that + /// references this range list. + bool containsAddress(uint64_t BaseAddress, uint64_t Address) const; +}; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index ee2a3ab7b7..c9ecbbbbd4 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -41,6 +41,10 @@ static const uint8_t form_sizes_addr4[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 4, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; static const uint8_t form_sizes_addr8[] = { @@ -67,6 +71,10 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect + 8, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + 8, // 0x20 DW_FORM_ref_sig8 }; const uint8_t * @@ -93,6 +101,7 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, case DW_FORM_ref_addr: Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); break; + case DW_FORM_exprloc: case DW_FORM_block: Value.uval = data.getULEB128(offset_ptr); is_block = true; @@ -141,12 +150,24 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, // Set the string value to also be the data for inlined cstr form // values only so we can tell the differnence between DW_FORM_string // and DW_FORM_strp form values - Value.data = (uint8_t*)Value.cstr; + Value.data = (const uint8_t*)Value.cstr; break; case DW_FORM_indirect: Form = data.getULEB128(offset_ptr); indirect = true; break; + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + Value.uval = data.getU32(offset_ptr); + else + Value.uval = data.getU64(offset_ptr); + break; + case DW_FORM_flag_present: + Value.uval = 1; + break; + case DW_FORM_ref_sig8: + Value.uval = data.getU64(offset_ptr); + break; default: return false; } @@ -179,6 +200,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, switch (form) { // Blocks if inlined data that have a length field and the data bytes // inlined in the .debug_info + case DW_FORM_exprloc: case DW_FORM_block: { uint64_t size = debug_info_data.getULEB128(offset_ptr); *offset_ptr += size; @@ -211,6 +233,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, *offset_ptr += cu->getAddressByteSize(); return true; + // 0 byte values - implied from the form. + case DW_FORM_flag_present: + return true; + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -234,6 +260,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: + case DW_FORM_ref_sig8: *offset_ptr += 8; return true; @@ -249,6 +276,15 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, indirect = true; form = debug_info_data.getULEB128(offset_ptr); break; + + // 4 for DWARF32, 8 for DWARF64. + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + *offset_ptr += 4; + else + *offset_ptr += 8; + return true; + default: return false; } @@ -264,22 +300,26 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_ref_sig8: case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; OS.write_escaped(getAsCString(NULL)); OS << '"'; break; + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: if (uvalue > 0) { switch (Form) { + case DW_FORM_exprloc: case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; @@ -342,6 +382,14 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_indirect: OS << "DW_FORM_indirect"; break; + + case DW_FORM_sec_offset: + if (cu->getAddressByteSize() == 4) + OS << format("0x%08x", (uint32_t)uvalue); + else + OS << format("0x%016" PRIx64, uvalue); + break; + default: OS << format("DW_FORM(0x%4.4x)", Form); break; @@ -404,6 +452,7 @@ const uint8_t *DWARFFormValue::BlockData() const { bool DWARFFormValue::isBlockForm(uint16_t form) { switch (form) { + case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h index 22ac011664..c5b590db95 100644 --- a/lib/DebugInfo/DWARFFormValue.h +++ b/lib/DebugInfo/DWARFFormValue.h @@ -52,7 +52,7 @@ public: bool extractValue(DataExtractor data, uint32_t *offset_ptr, const DWARFCompileUnit *cu); bool isInlinedCStr() const { - return Value.data != NULL && Value.data == (uint8_t*)Value.cstr; + return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr; } const uint8_t *BlockData() const; uint64_t getReference(const DWARFCompileUnit* cu) const; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 4afc900f64..ba0aeca58d 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -833,7 +833,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes) { assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); - uint8_t *Src = (uint8_t *)IntVal.getRawData(); + const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); if (sys::isLittleEndianHost()) { // Little-endian host - the source is ordered from LSB to MSB. Order the diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index d8bbc01d3e..b19fc6fa97 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -1138,7 +1138,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, void JITEmitter::startFunction(MachineFunction &F) { DEBUG(dbgs() << "JIT: Starting CodeGen of Function " - << F.getFunction()->getName() << "\n"); + << F.getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already @@ -1287,7 +1287,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { PrevDL = DebugLoc(); DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart - << "] Function: " << F.getFunction()->getName() + << "] Function: " << F.getName() << ": " << (FnEnd-FnStart) << " bytes of text, " << Relocations.size() << " relocations\n"); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 99c65ecf95..fa71305145 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -113,6 +113,11 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { + // FIXME: This should really return a uint64_t since it's a pointer in the + // target address space, not our local address space. That's part of the + // ExecutionEngine interface, though. Fix that when the old JIT finally + // dies. + // FIXME: Add support for per-module compilation state if (!isCompiled) emitObject(M); @@ -126,10 +131,13 @@ void *MCJIT::getPointerToFunction(Function *F) { // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. + // + // This is the accessor for the target address, so make sure to check the + // load address of the symbol, not the local address. StringRef BaseName = F->getName(); if (BaseName[0] == '\1') - return (void*)Dyld.getSymbolAddress(BaseName.substr(1)); - return (void*)Dyld.getSymbolAddress((TM->getMCAsmInfo()->getGlobalPrefix() + return (void*)Dyld.getSymbolLoadAddress(BaseName.substr(1)); + return (void*)Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() + BaseName).str()); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a98ddc0e12..d47287b878 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -479,6 +479,10 @@ void *RuntimeDyld::getSymbolAddress(StringRef Name) { return Dyld->getSymbolAddress(Name); } +uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) { + return Dyld->getSymbolLoadAddress(Name); +} + void RuntimeDyld::resolveRelocations() { Dyld->resolveRelocations(); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 3d8999437c..d5df732b91 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -177,6 +177,10 @@ protected: return true; } + uint64_t getSectionLoadAddress(unsigned SectionID) { + return Sections[SectionID].LoadAddress; + } + uint8_t *getSectionAddress(unsigned SectionID) { return (uint8_t*)Sections[SectionID].Address; } @@ -223,7 +227,10 @@ protected: void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value); /// \brief A object file specific relocation resolver - /// \param Address Address to apply the relocation action + /// \param LocalAddress The address to apply the relocation action + /// \param FinalAddress If the linker prepare code for remote executon then + /// FinalAddress has the remote address to apply the + /// relocation action, otherwise is same as LocalAddress /// \param Value Target symbol address to apply the relocation action /// \param Type object file specific relocation type /// \param Addend A constant addend used to compute the value to be stored @@ -267,6 +274,15 @@ public: return getSectionAddress(Loc.first) + Loc.second; } + uint64_t getSymbolLoadAddress(StringRef Name) { + // FIXME: Just look up as a function for now. Overly simple of course. + // Work in progress. + if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end()) + return 0; + SymbolLoc Loc = GlobalSymbolTable.lookup(Name); + return getSectionLoadAddress(Loc.first) + Loc.second; + } + void resolveRelocations(); void reassignSectionAddress(unsigned SectionID, uint64_t Addr); diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 6a3e16d985..b9d2a88485 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -270,9 +270,10 @@ class ELFObjectWriter : public MCObjectWriter { /// ComputeSymbolTable - Compute the symbol table data /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. + /// \param Asm - The assembler. + /// \param SectionIndexMap - Maps a section to its index. + /// \param RevGroupMap - Maps a signature symbol to the group section. + /// \param NumRegularSections - Number of non-relocation sections. void ComputeSymbolTable(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, RevGroupMapTy RevGroupMap, diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e731d95916..c270c2cd34 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -166,7 +166,7 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - /// @param Size - The alignment of the common symbol in bytes. + /// @param ByteAlignment - The alignment of the common symbol in bytes. virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 7f8469fbd3..02bd6deb62 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -459,6 +459,12 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, const MCAlignFragment &AF = cast<MCAlignFragment>(F); unsigned Offset = Layout.getFragmentOffset(&AF); unsigned Size = OffsetToAlignment(Offset, AF.getAlignment()); + // If we are padding with nops, force the padding to be larger than the + // minimum nop size. + if (Size > 0 && AF.hasEmitNops()) { + while (Size % getBackend().getMinimumNopSize()) + Size += AF.getAlignment(); + } if (Size > AF.getMaxBytesToEmit()) return 0; return Size; @@ -1101,6 +1107,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) { } +#ifndef NDEBUG void MCFragment::dump() { raw_ostream &OS = llvm::errs(); @@ -1269,6 +1276,7 @@ void MCAssembler::dump() { } OS << "]>\n"; } +#endif // anchors for MC*Fragment vtables void MCDataFragment::anchor() { } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 352c28dda7..ee597cfa21 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -425,9 +425,11 @@ void MCDwarfFile::print(raw_ostream &OS) const { OS << '"' << getName() << '"'; } +#ifndef NDEBUG void MCDwarfFile::dump() const { print(dbgs()); } +#endif // Utility function to write a tuple for .debug_abbrev. static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 0eb7fcce68..b19665949d 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -136,10 +136,12 @@ void MCExpr::print(raw_ostream &OS) const { llvm_unreachable("Invalid expression kind!"); } +#ifndef NDEBUG void MCExpr::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /* *** */ @@ -197,7 +199,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_GOTTPOFF: return "(gottpoff)"; case VK_ARM_TLSGD: return "(tlsgd)"; case VK_ARM_TARGET1: return "(target1)"; - case VK_PPC_TOC: return "toc"; + case VK_PPC_TOC: return "tocbase"; + case VK_PPC_TOC_ENTRY: return "toc"; case VK_PPC_DARWIN_HA16: return "ha16"; case VK_PPC_DARWIN_LO16: return "lo16"; case VK_PPC_GAS_HA16: return "ha"; diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index 7bbfd2efa1..e96010bd5c 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -32,10 +32,12 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << ">"; } +#ifndef NDEBUG void MCOperand::dump() const { print(dbgs(), 0); dbgs() << "\n"; } +#endif void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << "<MCInst " << getOpcode(); @@ -62,7 +64,9 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, OS << ">"; } +#ifndef NDEBUG void MCInst::dump() const { print(dbgs(), 0); dbgs() << "\n"; } +#endif diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp index 9c0fc92e6c..95d7d16a19 100644 --- a/lib/MC/MCLabel.cpp +++ b/lib/MC/MCLabel.cpp @@ -16,6 +16,8 @@ void MCLabel::print(raw_ostream &OS) const { OS << '"' << getInstance() << '"'; } +#ifndef NDEBUG void MCLabel::dump() const { print(dbgs()); } +#endif diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 6964d12267..c4d9e22280 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -320,12 +320,18 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) { MCDataFragment *DF = getOrCreateDataFragment(); - DF->addFixup(MCFixup::Create(DF->getContents().size(), - Value, - FK_GPRel_4)); + DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4)); DF->getContents().resize(DF->getContents().size() + 4, 0); } +// Associate GPRel32 fixup with data and resize data area +void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) { + MCDataFragment *DF = getOrCreateDataFragment(); + + DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, FK_GPRel_4)); + DF->getContents().resize(DF->getContents().size() + 8, 0); +} + void MCObjectStreamer::FinishImpl() { // Dump out the dwarf file & directory tables and line tables. const MCSymbol *LineSectionSymbol = NULL; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6472242dbe..004742fbae 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -133,13 +133,13 @@ private: public: AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); - ~AsmParser(); + virtual ~AsmParser(); virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); - void AddDirectiveHandler(MCAsmParserExtension *Object, - StringRef Directive, - DirectiveHandler Handler) { + virtual void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) { DirectiveMap[Directive] = std::make_pair(Object, Handler); } @@ -166,7 +166,7 @@ public: virtual bool Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()); - const AsmToken &Lex(); + virtual const AsmToken &Lex(); bool ParseExpression(const MCExpr *&Res); virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); @@ -207,7 +207,7 @@ private: /// subsequently. void JumpToLoc(SMLoc Loc); - void EatToEndOfStatement(); + virtual void EatToEndOfStatement(); bool ParseMacroArgument(MacroArgument &MA); bool ParseMacroArguments(const Macro *M, MacroArguments &A); @@ -215,7 +215,7 @@ private: /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. - StringRef ParseStringToEndOfStatement(); + virtual StringRef ParseStringToEndOfStatement(); /// \brief Parse until the end of a statement or a comma is encountered, /// return the contents from the current token up to the end or comma. @@ -230,7 +230,7 @@ private: /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) /// and set \arg Res to the identifier contents. - bool ParseIdentifier(StringRef &Res); + virtual bool ParseIdentifier(StringRef &Res); // Directive Parsing. diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 3a825f03b7..93ee2dd0c0 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -44,5 +44,7 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) { } void MCParsedAsmOperand::dump() const { +#ifndef NDEBUG dbgs() << " " << *this; +#endif } diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 05c83f760a..cbf853cd8e 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -70,7 +70,7 @@ uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) { } -MCSchedModel * +const MCSchedModel * MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModel && "Processor machine model not available!"); @@ -93,11 +93,11 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { return &MCSchedModel::DefaultSchedModel; } assert(Found->Value && "Missing processor SchedModel value"); - return (MCSchedModel *)Found->Value; + return (const MCSchedModel *)Found->Value; } InstrItineraryData MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { - MCSchedModel *SchedModel = getSchedModelForCPU(CPU); + const MCSchedModel *SchedModel = getSchedModelForCPU(CPU); return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths); } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index f7f9184f03..f60126b8fa 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -76,6 +76,8 @@ void MCSymbol::print(raw_ostream &OS) const { OS << '"' << getName() << '"'; } +#ifndef NDEBUG void MCSymbol::dump() const { print(dbgs()); } +#endif diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index c6ea16ce7b..a37149d788 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -31,6 +31,8 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { OS << " + " << getConstant(); } +#ifndef NDEBUG void MCValue::dump() const { print(dbgs(), 0); } +#endif diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 5820a224c5..c57b0d65c1 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -396,8 +396,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { continue; // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); Asm.getOrCreateSymbolData(*it->Symbol); } @@ -414,8 +413,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { continue; // Initialize the section indirect symbol base, if necessary. - if (!IndirectSymBase.count(it->SectionData)) - IndirectSymBase[it->SectionData] = IndirectIndex; + IndirectSymBase.insert(std::make_pair(it->SectionData, IndirectIndex)); // Set the symbol type to undefined lazy, but only on construction. // diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index e93b4de969..bd398ceec2 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -337,9 +337,9 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, } /// Get scheduling itinerary of a CPU. -void *SubtargetFeatures::getItinerary(const StringRef CPU, - const SubtargetInfoKV *Table, - size_t TableSize) { +const void *SubtargetFeatures::getItinerary(const StringRef CPU, + const SubtargetInfoKV *Table, + size_t TableSize) { assert(Table && "missing table"); #ifndef NDEBUG for (size_t i = 1; i < TableSize; i++) { @@ -368,11 +368,13 @@ void SubtargetFeatures::print(raw_ostream &OS) const { OS << "\n"; } +#ifndef NDEBUG /// dump - Dump feature info. /// void SubtargetFeatures::dump() const { print(dbgs()); } +#endif /// getDefaultSubtargetFeatures - Return a string listing the features /// associated with the target triple. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index ed261a4194..f143e6d0ad 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -196,8 +196,10 @@ totalExponent(StringRef::iterator p, StringRef::iterator end, assert(value < 10U && "Invalid character in exponent"); unsignedExponent = unsignedExponent * 10 + value; - if (unsignedExponent > 32767) + if (unsignedExponent > 32767) { overflow = true; + break; + } } if (exponentAdjustment > 32767 || exponentAdjustment < -32768) diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 41ddc3a3bc..d8884381ab 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -160,7 +160,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { // On linux we have a weird situation. The stderr/out/in symbols are both // macros and global variables because of standards requirements. So, we // boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) +#if defined(__linux__) and !defined(__ANDROID__) { EXPLICIT_SYMBOL(stderr); EXPLICIT_SYMBOL(stdout); diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index d2cb5c969e..7610d281f0 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -49,7 +49,7 @@ LockFileManager::readLockFile(StringRef LockFileName) { } bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { -#if LLVM_ON_UNIX +#if LLVM_ON_UNIX && !defined(__ANDROID__) char MyHostname[256]; MyHostname[255] = 0; MyHostname[0] = 0; diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index c2fc261df3..9ac1f867fd 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Compiler.h" #include <cassert> using namespace llvm; @@ -69,7 +70,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return it. - if (BucketItem == 0) { + if (LLVM_LIKELY(BucketItem == 0)) { // If we found a tombstone, we want to reuse the tombstone instead of an // empty bucket. This reduces probing. if (FirstTombstone != -1) { @@ -84,7 +85,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { if (BucketItem == getTombstoneVal()) { // Skip over tombstones. However, remember the first one we see. if (FirstTombstone == -1) FirstTombstone = BucketNo; - } else if (HashTable[BucketNo] == FullHashValue) { + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { // If the full hash value matches, check deeply for a match. The common // case here is that we are only looking at the buckets (for item info // being non-null and for the full hash value) not at the items. This @@ -123,12 +124,12 @@ int StringMapImpl::FindKey(StringRef Key) const { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return. - if (BucketItem == 0) + if (LLVM_LIKELY(BucketItem == 0)) return -1; if (BucketItem == getTombstoneVal()) { // Ignore tombstones. - } else if (HashTable[BucketNo] == FullHashValue) { + } else if (LLVM_LIKELY(HashTable[BucketNo] == FullHashValue)) { // If the full hash value matches, check deeply for a match. The common // case here is that we are only looking at the buckets (for item info // being non-null and for the full hash value) not at the items. This diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index cca549dad5..d1dc7c81af 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -95,6 +95,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case SCEI: return "scei"; case BGP: return "bgp"; case BGQ: return "bgq"; + case Freescale: return "fsl"; } llvm_unreachable("Invalid VendorType!"); @@ -138,7 +139,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNUEABI: return "gnueabi"; case EABI: return "eabi"; case MachO: return "macho"; - case ANDROIDEABI: return "androideabi"; + case Android: return "android"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -269,6 +270,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("scei", Triple::SCEI) .Case("bgp", Triple::BGP) .Case("bgq", Triple::BGQ) + .Case("fsl", Triple::Freescale) .Default(Triple::UnknownVendor); } @@ -305,7 +307,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("gnueabi", Triple::GNUEABI) .StartsWith("gnu", Triple::GNU) .StartsWith("macho", Triple::MachO) - .StartsWith("androideabi", Triple::ANDROIDEABI) + .StartsWith("android", Triple::Android) .Default(Triple::UnknownEnvironment); } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 35f01802ba..704f4681bc 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -299,7 +299,7 @@ static void PrintStackTrace(void *) { #endif } -/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or +/// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void llvm::sys::PrintStackTraceOnErrorSignal() { AddSignalHandler(PrintStackTrace, 0); @@ -311,10 +311,10 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { exception_mask_t mask = EXC_MASK_CRASH; - kern_return_t ret = task_set_exception_ports(self, + kern_return_t ret = task_set_exception_ports(self, mask, MACH_PORT_NULL, - EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, + EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, THREAD_STATE_NONE); (void)ret; } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index fa69c2d3f5..7cd53648da 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -266,8 +266,8 @@ void raw_ostream::flush_nonempty() { raw_ostream &raw_ostream::write(unsigned char C) { // Group exceptional cases into a single branch. - if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) { - if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) { + if (LLVM_UNLIKELY(!OutBufStart)) { if (BufferMode == Unbuffered) { write_impl(reinterpret_cast<char*>(&C), 1); return *this; @@ -286,8 +286,8 @@ raw_ostream &raw_ostream::write(unsigned char C) { raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // Group exceptional cases into a single branch. - if (BUILTIN_EXPECT(size_t(OutBufEnd - OutBufCur) < Size, false)) { - if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) { + if (LLVM_UNLIKELY(!OutBufStart)) { if (BufferMode == Unbuffered) { write_impl(Ptr, Size); return *this; @@ -302,7 +302,7 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // If the buffer is empty at this point we have a string that is larger // than the buffer. Directly write the chunk that is a multiple of the // preferred buffer size and put the remainder in the buffer. - if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) { + if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { size_t BytesToWrite = Size - (Size % NumBytes); write_impl(Ptr, BytesToWrite); copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); @@ -523,7 +523,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { ssize_t ret; // Check whether we should attempt to use atomic writes. - if (BUILTIN_EXPECT(!UseAtomicWrites, true)) { + if (LLVM_LIKELY(!UseAtomicWrites)) { ret = ::write(FD, Ptr, Size); } else { // Use ::writev() where available. diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c index 007861675b..bd5e72d4c5 100644 --- a/lib/Support/regexec.c +++ b/lib/Support/regexec.c @@ -69,7 +69,7 @@ #define SETUP(v) ((v) = 0) #define onestate long #define INIT(o, n) ((o) = (unsigned long)1 << (n)) -#define INC(o) ((o) <<= 1) +#define INC(o) ((o) = (unsigned long)(o) << 1) #define ISSTATEIN(v, o) (((v) & (o)) != 0) /* some abbreviations; note that some of these know variable names! */ /* do "if I'm here, I can also be there" etc without branches */ diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 1463b68144..5dd688cb67 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -20,8 +20,19 @@ namespace llvm { SourceMgr SrcMgr; -void PrintWarning(SMLoc WarningLoc, const Twine &Msg) { - SrcMgr.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind, + const Twine &Msg) { + SMLoc NullLoc; + if (Loc.empty()) + Loc = NullLoc; + SrcMgr.PrintMessage(Loc.front(), Kind, Msg); + for (unsigned i = 1; i < Loc.size(); ++i) + SrcMgr.PrintMessage(Loc[i], SourceMgr::DK_Note, + "instantiated from multiclass"); +} + +void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg) { + PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); } void PrintWarning(const char *Loc, const Twine &Msg) { @@ -36,8 +47,8 @@ void PrintWarning(const TGError &Warning) { PrintWarning(Warning.getLoc(), Warning.getMessage()); } -void PrintError(SMLoc ErrorLoc, const Twine &Msg) { - SrcMgr.PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); +void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { + PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } void PrintError(const char *Loc, const Twine &Msg) { diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 99fdc1f6e9..b2a7b628e4 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -112,7 +112,10 @@ Init *BitRecTy::convertValue(IntInit *II) { } Init *BitRecTy::convertValue(TypedInit *VI) { - if (dynamic_cast<BitRecTy*>(VI->getType())) + RecTy *Ty = VI->getType(); + if (dynamic_cast<BitRecTy*>(Ty) || + dynamic_cast<BitsRecTy*>(Ty) || + dynamic_cast<IntRecTy*>(Ty)) return VI; // Accept variable if it is already of bit type! return 0; } @@ -178,60 +181,15 @@ Init *BitsRecTy::convertValue(BitsInit *BI) { } Init *BitsRecTy::convertValue(TypedInit *VI) { - if (BitsRecTy *BRT = dynamic_cast<BitsRecTy*>(VI->getType())) - if (BRT->Size == Size) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = VarBitInit::get(VI, i); - return BitsInit::get(NewBits); - } - if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType())) return BitsInit::get(VI); - if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) { - if (Tern->getOpcode() == TernOpInit::IF) { - Init *LHS = Tern->getLHS(); - Init *MHS = Tern->getMHS(); - Init *RHS = Tern->getRHS(); - - IntInit *MHSi = dynamic_cast<IntInit*>(MHS); - IntInit *RHSi = dynamic_cast<IntInit*>(RHS); - - if (MHSi && RHSi) { - int64_t MHSVal = MHSi->getValue(); - int64_t RHSVal = RHSi->getValue(); + if (VI->getType()->typeIsConvertibleTo(this)) { + SmallVector<Init *, 16> NewBits(Size); - if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = - TernOpInit::get(TernOpInit::IF, LHS, - IntInit::get((MHSVal & (1LL << i)) ? 1 : 0), - IntInit::get((RHSVal & (1LL << i)) ? 1 : 0), - VI->getType()); - - return BitsInit::get(NewBits); - } - } else { - BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS); - BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS); - - if (MHSbs && RHSbs) { - SmallVector<Init *, 16> NewBits(Size); - - for (unsigned i = 0; i != Size; ++i) - NewBits[i] = TernOpInit::get(TernOpInit::IF, LHS, - MHSbs->getBit(i), - RHSbs->getBit(i), - VI->getType()); - - return BitsInit::get(NewBits); - } - } - } + for (unsigned i = 0; i != Size; ++i) + NewBits[i] = VarBitInit::get(VI, i); + return BitsInit::get(NewBits); } return 0; @@ -519,6 +477,15 @@ std::string BitsInit::getAsString() const { return Result + " }"; } +// Fix bit initializer to preserve the behavior that bit reference from a unset +// bits initializer will resolve into VarBitInit to keep the field name and bit +// number used in targets with fixed insn length. +static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) { + if (RV || After != UnsetInit::get()) + return After; + return Before; +} + // resolveReferences - If there are any field references that refer to fields // that have been filled in, we can propagate the values now. // @@ -526,16 +493,39 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector<Init *, 16> NewBits(getNumBits()); - for (unsigned i = 0, e = Bits.size(); i != e; ++i) { - Init *B; - Init *CurBit = getBit(i); + Init *CachedInit = 0; + Init *CachedBitVar = 0; + bool CachedBitVarChanged = false; + + for (unsigned i = 0, e = getNumBits(); i != e; ++i) { + Init *CurBit = Bits[i]; + Init *CurBitVar = CurBit->getBitVar(); - do { - B = CurBit; - CurBit = CurBit->resolveReferences(R, RV); - Changed |= B != CurBit; - } while (B != CurBit); NewBits[i] = CurBit; + + if (CurBitVar == CachedBitVar) { + if (CachedBitVarChanged) { + Init *Bit = CachedInit->getBit(CurBit->getBitNum()); + NewBits[i] = fixBitInit(RV, CurBit, Bit); + } + continue; + } + CachedBitVar = CurBitVar; + CachedBitVarChanged = false; + + Init *B; + do { + B = CurBitVar; + CurBitVar = CurBitVar->resolveReferences(R, RV); + CachedBitVarChanged |= B != CurBitVar; + Changed |= B != CurBitVar; + } while (B != CurBitVar); + CachedInit = CurBitVar; + + if (CachedBitVarChanged) { + Init *Bit = CurBitVar->getBit(CurBit->getBitNum()); + NewBits[i] = fixBitInit(RV, CurBit, Bit); + } } if (Changed) @@ -682,20 +672,6 @@ std::string ListInit::getAsString() const { return Result + "]"; } -Init *OpInit::resolveBitReference(Record &R, const RecordVal *IRV, - unsigned Bit) const { - Init *Folded = Fold(&R, 0); - - if (Folded != this) { - TypedInit *Typed = dynamic_cast<TypedInit *>(Folded); - if (Typed) { - return Typed->resolveBitReference(R, IRV, Bit); - } - } - - return 0; -} - Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { Init *Resolved = resolveReferences(R, IRV); @@ -718,6 +694,12 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, return 0; } +Init *OpInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<OpInit*>(this); + return VarBitInit::get(const_cast<OpInit*>(this), Bit); +} + UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) { typedef std::pair<std::pair<unsigned, Init *>, RecTy *> Key; @@ -922,9 +904,9 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { case EQ: { // try to fold eq comparison for 'bit' and 'int', otherwise fallback // to string objects. - IntInit* L = + IntInit *L = dynamic_cast<IntInit*>(LHS->convertInitializerTo(IntRecTy::get())); - IntInit* R = + IntInit *R = dynamic_cast<IntInit*>(RHS->convertInitializerTo(IntRecTy::get())); if (L && R) @@ -1324,25 +1306,10 @@ const std::string &VarInit::getName() const { return NameString->getValue(); } -Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV, - unsigned Bit) const { - if (R.isTemplateArg(getNameInit())) return 0; - if (IRV && IRV->getNameInit() != getNameInit()) return 0; - - RecordVal *RV = R.getValue(getNameInit()); - assert(RV && "Reference to a non-existent variable?"); - assert(dynamic_cast<BitsInit*>(RV->getValue())); - BitsInit *BI = (BitsInit*)RV->getValue(); - - assert(Bit < BI->getNumBits() && "Bit reference out of range!"); - Init *B = BI->getBit(Bit); - - // If the bit is set to some value, or if we are resolving a reference to a - // specific variable and that variable is explicitly unset, then replace the - // VarBitInit with it. - if (IRV || !dynamic_cast<UnsetInit*>(B)) - return B; - return 0; +Init *VarInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<VarInit*>(this); + return VarBitInit::get(const_cast<VarInit*>(this), Bit); } Init *VarInit::resolveListElementReference(Record &R, @@ -1425,9 +1392,11 @@ std::string VarBitInit::getAsString() const { } Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { - if (Init *I = getVariable()->resolveBitReference(R, RV, getBitNum())) - return I; - return const_cast<VarBitInit *>(this); + Init *I = TI->resolveReferences(R, RV); + if (TI != I) + return I->getBit(getBitNum()); + + return const_cast<VarBitInit*>(this); } VarListElementInit *VarListElementInit::get(TypedInit *T, @@ -1456,11 +1425,10 @@ VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) const { return const_cast<VarListElementInit *>(this); } -Init *VarListElementInit::resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - // FIXME: This should be implemented, to support references like: - // bit B = AA[0]{1}; - return 0; +Init *VarListElementInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<VarListElementInit*>(this); + return VarBitInit::get(const_cast<VarListElementInit*>(this), Bit); } Init *VarListElementInit:: resolveListElementReference(Record &R, @@ -1513,17 +1481,10 @@ FieldInit *FieldInit::get(Init *R, const std::string &FN) { return I; } -Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV, - unsigned Bit) const { - if (Init *BitsVal = Rec->getFieldInit(R, RV, FieldName)) - if (BitsInit *BI = dynamic_cast<BitsInit*>(BitsVal)) { - assert(Bit < BI->getNumBits() && "Bit reference out of range!"); - Init *B = BI->getBit(Bit); - - if (dynamic_cast<BitInit*>(B)) // If the bit is set. - return B; // Replace the VarBitInit with it. - } - return 0; +Init *FieldInit::getBit(unsigned Bit) const { + if (getType() == BitRecTy::get()) + return const_cast<FieldInit*>(this); + return VarBitInit::get(const_cast<FieldInit*>(this), Bit); } Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, @@ -1751,7 +1712,15 @@ void Record::resolveReferencesTo(const RecordVal *RV) { if (RV == &Values[i]) // Skip resolve the same field as the given one continue; if (Init *V = Values[i].getValue()) - Values[i].setValue(V->resolveReferences(*this, RV)); + if (Values[i].setValue(V->resolveReferences(*this, RV))) + throw TGError(getLoc(), "Invalid value is found when setting '" + + Values[i].getNameInitAsString() + + "' after resolving references" + + (RV ? " against '" + RV->getNameInitAsString() + + "' of (" + + RV->getValue()->getAsUnquotedString() + ")" + : "") + + "\n"); } Init *OldName = getNameInit(); Init *NewName = Name->resolveReferences(*this, RV); @@ -1963,6 +1932,23 @@ bool Record::getValueAsBit(StringRef FieldName) const { "' does not have a bit initializer!"; } +bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { + const RecordVal *R = getValue(FieldName); + if (R == 0 || R->getValue() == 0) + throw "Record `" + getName() + "' does not have a field named `" + + FieldName.str() + "'!\n"; + + if (R->getValue() == UnsetInit::get()) { + Unset = true; + return false; + } + Unset = false; + if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue())) + return BI->getValue(); + throw "Record `" + getName() + "', field `" + FieldName.str() + + "' does not have a bit initializer!"; +} + /// getValueAsDag - This method looks up the specified field and returns its /// value as an Dag, throwing an exception if the field does not exist or if /// the value is not the right type. diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index b9c7ff694d..aee93e7696 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -1044,35 +1044,28 @@ Init *TGParser::ParseOperation(Record *CurRec) { switch (LexCode) { default: llvm_unreachable("Unhandled code!"); case tgtok::XIf: { - // FIXME: The `!if' operator doesn't handle non-TypedInit well at - // all. This can be made much more robust. - TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS); - TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS); - RecTy *MHSTy = 0; RecTy *RHSTy = 0; - if (MHSt == 0 && RHSt == 0) { - BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS); - BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS); - - if (MHSbits && RHSbits && - MHSbits->getNumBits() == RHSbits->getNumBits()) { - Type = BitRecTy::get(); - break; - } else { - BitInit *MHSbit = dynamic_cast<BitInit*>(MHS); - BitInit *RHSbit = dynamic_cast<BitInit*>(RHS); - - if (MHSbit && RHSbit) { - Type = BitRecTy::get(); - break; - } - } - } else if (MHSt != 0 && RHSt != 0) { + if (TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS)) MHSTy = MHSt->getType(); + if (BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS)) + MHSTy = BitsRecTy::get(MHSbits->getNumBits()); + if (dynamic_cast<BitInit*>(MHS)) + MHSTy = BitRecTy::get(); + + if (TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS)) RHSTy = RHSt->getType(); - } + if (BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS)) + RHSTy = BitsRecTy::get(RHSbits->getNumBits()); + if (dynamic_cast<BitInit*>(RHS)) + RHSTy = BitRecTy::get(); + + // For UnsetInit, it's typed from the other hand. + if (dynamic_cast<UnsetInit*>(MHS)) + MHSTy = RHSTy; + if (dynamic_cast<UnsetInit*>(RHS)) + RHSTy = MHSTy; if (!MHSTy || !RHSTy) { TokError("could not get type for !if"); @@ -2277,7 +2270,10 @@ InstantiateMulticlassDef(MultiClass &MC, DefName, StringRecTy::get())->Fold(DefProto, &MC); } - Record *CurRec = new Record(DefName, DefmPrefixLoc, Records); + // Make a trail of SMLocs from the multiclass instantiations. + SmallVector<SMLoc, 4> Locs(1, DefmPrefixLoc); + Locs.append(DefProto->getLoc().begin(), DefProto->getLoc().end()); + Record *CurRec = new Record(DefName, Locs, Records); SubClassReference Ref; Ref.RefLoc = DefmPrefixLoc; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index aab8825444..acd2a03354 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -683,7 +683,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; - unsigned Spacing = 1; + int Spacing = 1; // Use VORRq when possible. if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) @@ -705,27 +705,38 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; - if (Opc) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstrBuilder Mov; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); - assert(Dst && Src && "Bad sub-register"); - Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src)); - // VORR takes two source operands. - if (Opc == ARM::VORRq) - Mov.addReg(Src); - } - // Add implicit super-register defs and kills to the last instruction. - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - return; - } + assert(Opc && "Impossible reg-to-reg copy"); - llvm_unreachable("Impossible reg-to-reg copy"); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstrBuilder Mov; + + // Copy register tuples backward when the first Dest reg overlaps with SrcReg. + if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { + BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + Spacing = -Spacing; + } +#ifndef NDEBUG + SmallSet<unsigned, 4> DstRegs; +#endif + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); +#ifndef NDEBUG + assert(!DstRegs.count(Src) && "destructive vector copy"); + DstRegs.insert(Dst); +#endif + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); + Mov = AddDefaultPred(Mov); + } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); } static const @@ -1569,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { @@ -1588,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1665,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1696,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -3368,7 +3354,8 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { // converted. if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR)) + MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); // No other instructions can be swizzled, so just determine their domain. @@ -3388,13 +3375,28 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { return std::make_pair(ExeGeneric, 0); } +static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, + unsigned SReg, unsigned &Lane) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + + if (DReg != ARM::NoRegister) + return DReg; + + Lane = 1; + DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass); + + assert(DReg && "S-register with no D super-register?"); + return DReg; +} + + void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; MachineInstrBuilder MIB(MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); - bool isKill; switch (MI->getOpcode()) { default: llvm_unreachable("cannot handle opcode!"); @@ -3405,78 +3407,175 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VORRd)); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(SrcReg) + .addReg(SrcReg)); break; case ARM::VMOVRS: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); + DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane); + // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) + // Note that DSrc has been widened and the other lane may be undef, which + // contaminates the entire register. MI->setDesc(get(ARM::VGETLNi32)); - MIB.addReg(DReg); - MIB.addImm(Lane); + AddDefaultPred(MIB.addReg(DstReg, RegState::Define) + .addReg(DReg, RegState::Undef) + .addImm(Lane)); - MIB->getOperand(1).setIsUndef(); + // The old source should be an implicit use, otherwise we might think it + // was dead before here. MIB.addReg(SrcReg, RegState::Implicit); - - AddDefaultPred(MIB); break; case ARM::VMOVSR: if (Domain != ExeNEON) break; assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); - Lane = 0; - if (DReg == ARM::NoRegister) { - DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); - Lane = 1; - assert(DReg && "S-register with no D super-register?"); - } - isKill = MI->getOperand(0).isKill(); - MI->RemoveOperand(3); - MI->RemoveOperand(2); - MI->RemoveOperand(1); - MI->RemoveOperand(0); + DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane); + + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DReg, TRI) && !MI->readsRegister(DReg, TRI)) + break; + + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) + // Again DDst may be undefined at the beginning of this instruction. MI->setDesc(get(ARM::VSETLNi32)); - MIB.addReg(DReg, RegState::Define); - MIB.addReg(DReg, RegState::Undef); - MIB.addReg(SrcReg); - MIB.addImm(Lane); + MIB.addReg(DReg, RegState::Define) + .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI))) + .addReg(SrcReg) + .addImm(Lane); + AddDefaultPred(MIB); + + // The narrower destination must be marked as set to keep previous chains + // in place. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); + break; + case ARM::VMOVS: { + if (Domain != ExeNEON) + break; + + // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; + DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane); + DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane); + + // If we insert both a novel <def> and an <undef> on the DReg, we break + // any existing dependency chain on the unused lane. Either already being + // present means this instruction is in that chain anyway so we can make + // the transformation. + if (!MI->definesRegister(DDst, TRI) && !MI->readsRegister(DDst, TRI)) + break; - if (isKill) - MIB->addRegisterKilled(DstReg, TRI, true); - MIB->addRegisterDefined(DstReg, TRI); + for (unsigned i = MI->getDesc().getNumOperands(); i; --i) + MI->RemoveOperand(i-1); + + if (DSrc == DDst) { + // Destination can be: + // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) + MI->setDesc(get(ARM::VDUPLN32d)); + MIB.addReg(DDst, RegState::Define) + .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI))) + .addImm(SrcLane); + AddDefaultPred(MIB); + + // Neither the source or the destination are naturally represented any + // more, so add them in manually. + MIB.addReg(DstReg, RegState::Implicit | RegState::Define); + MIB.addReg(SrcReg, RegState::Implicit); + break; + } + // In general there's no single instruction that can perform an S <-> S + // move in NEON space, but a pair of VEXT instructions *can* do the + // job. It turns out that the VEXTs needed will only use DSrc once, with + // the position based purely on the combination of lane-0 and lane-1 + // involved. For example + // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 + // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 + // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 + // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 + // + // Pattern of the MachineInstrs is: + // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) + MachineInstrBuilder NewMIB; + NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::VEXTd32), DDst); + + // On the first instruction, both DSrc and DDst may be <undef> if present. + // Specifically when the original instruction didn't have them as an + // <imp-use>. + unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; + bool CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; + CurUndef = !MI->readsRegister(CurReg, TRI); + NewMIB.addReg(CurReg, getUndefRegState(CurUndef)); + + NewMIB.addImm(1); + AddDefaultPred(NewMIB); + + if (SrcLane == DstLane) + NewMIB.addReg(SrcReg, RegState::Implicit); + + MI->setDesc(get(ARM::VEXTd32)); + MIB.addReg(DDst, RegState::Define); + + // On the second instruction, DDst has definitely been defined above, so + // it is not <undef>. DSrc, if present, can be <undef> as above. + CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; + CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI); + MIB.addReg(CurReg, getUndefRegState(CurUndef)); + + MIB.addImm(1); AddDefaultPred(MIB); + + if (SrcLane != DstLane) + MIB.addReg(SrcReg, RegState::Implicit); + + // As before, the original destination is no longer represented, add it + // implicitly. + MIB.addReg(DstReg, RegState::Define | RegState::Implicit); break; + } } } diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e81b4cc282..68406db843 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -389,7 +389,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 8135d52f7c..2c4b9f7cb2 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1389,10 +1389,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // If the original WaterList entry was "new water" on this iteration, // propagate that to the new island. This is just keeping NewWaterList // updated to match the WaterList, which will be updated below. - if (NewWaterList.count(WaterBB)) { - NewWaterList.erase(WaterBB); + if (NewWaterList.erase(WaterBB)) NewWaterList.insert(NewIsland); - } + // The new CPE goes before the following block (NewMBB). NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e03ad8de0d..c292821e79 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1305,6 +1305,57 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; + case ARM::VSETLNi8Q: + case ARM::VSETLNi16Q: { + // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting + // on the respective D register. + + unsigned QReg = MI.getOperand(1).getReg(); + unsigned QLane = MI.getOperand(3).getImm(); + + unsigned NewOpcode, DLane, DSubReg; + switch (Opcode) { + default: llvm_unreachable("Invalid opcode!"); + case ARM::VSETLNi8Q: + // 4 possible 8-bit lanes per DPR: + NewOpcode = ARM::VSETLNi8; + DLane = QLane % 8; + DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; + break; + case ARM::VSETLNi16Q: + // 4 possible 16-bit lanes per DPR. + NewOpcode = ARM::VSETLNi16; + DLane = QLane % 4; + DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; + break; + } + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); + + unsigned DReg = TRI->getSubReg(QReg, DSubReg); + + MIB.addReg(DReg, RegState::Define); // Output DPR + MIB.addReg(DReg); // Input DPR + MIB.addOperand(MI.getOperand(2)); // Input GPR + MIB.addImm(DLane); // Lane + + // Add the predicate operands. + MIB.addOperand(MI.getOperand(4)); + MIB.addOperand(MI.getOperand(5)); + + if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(QReg, TRI, true); + // And an implicit def of the output register (which should always be the + // same as the input register). + MIB->addRegisterDefined(QReg, TRI); + + TransferImpOps(MI, MIB, MIB); + + MI.eraseFromParent(); + return true; + } + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4848110bd7..873404effd 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -617,10 +617,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (VT != MVT::i32) return 0; Reloc::Model RelocM = TM.getRelocationModel(); - - // TODO: Need more magic for ARM PIC. - if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - + bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); // Use movw+movt when possible, it avoids constant pool entries. @@ -668,17 +665,30 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { .addConstantPoolIndex(Idx); if (RelocM == Reloc::PIC_) MIB.addImm(Id); + AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) .addImm(0); + AddOptionalDefs(MIB); + + if (RelocM == Reloc::PIC_) { + unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD; + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), NewDestReg) + .addReg(DestReg) + .addImm(Id); + AddOptionalDefs(MIB); + return NewDestReg; + } } - AddOptionalDefs(MIB); } - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + if (IsIndirect) { MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) @@ -2217,25 +2227,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if (isThumb2) { - // Explicitly adding the predicate here. + // BL / BLX don't take a predicate, but tBL / tBLX do. + if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); - } else { - if (EnableARMLongCalls) - MIB.addReg(CalleeReg); - else - MIB.addExternalSymbol(TLI.getLibcallName(Call)); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2363,30 +2365,20 @@ bool ARMFastISel::SelectCall(const Instruction *I, unsigned CallOpc = ARMSelectCallOp(UseReg); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); - if(isThumb2) { - // Explicitly adding the predicate here. - AddDefaultPred(MIB); - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - } else { - if (UseReg) - MIB.addReg(CalleeReg); - else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - // Explicitly adding the predicate here. + // ARM calls don't take a predicate, but tBL / tBLX do. + if(isThumb2) AddDefaultPred(MIB); - } + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); + MIB.addReg(RegArgs[i], RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). @@ -2655,7 +2647,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, unsigned Reg1 = getRegForValue(Src1Value); if (Reg1 == 0) return false; - unsigned Reg2; + unsigned Reg2 = 0; if (Opc == ARM::MOVsr) { Reg2 = getRegForValue(Src2Value); if (Reg2 == 0) return false; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bf66cc5085..3042b07920 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2738,6 +2738,38 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { dl, MVT::i32, MVT::i32, Ops, 5); } } + case ARMISD::UMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::UMLAL : ARM::UMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } + case ARMISD::SMLAL:{ + if (Subtarget->isThumb()) { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)}; + return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6); + }else{ + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::SMLAL : ARM::SMLALv5, + dl, MVT::i32, MVT::i32, Ops, 7); + } + } case ISD::LOAD: { SDNode *ResNode = 0; if (Subtarget->isThumb() && Subtarget->hasThumb2()) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b8cbc9c980..ca0fa57b86 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -576,6 +576,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // ARM and Thumb2 support UMLAL/SMLAL. + if (!Subtarget->isThumb1Only()) + setTargetDAGCombine(ISD::ADDC); + + computeRegisterProperties(); // ARM does not have f32 extending load. @@ -1021,6 +1026,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; + case ARMISD::UMLAL: return "ARMISD::UMLAL"; + case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; @@ -4383,10 +4390,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Scan through the operands to see if only one value is used. + // + // As an optimisation, even if more than one value is used it may be more + // profitable to splat with one value then change some lanes. + // + // Heuristically we decide to do this if the vector has a "dominant" value, + // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; + bool hasDominantValue = false; bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap<SDValue, unsigned> ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); @@ -4397,13 +4415,21 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) isConstant = false; - if (!Value.getNode()) + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; Value = V; - else if (V != Value) - usesOnlyOneValue = false; + } } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; - if (!Value.getNode()) + if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); if (isOnlyLowElement) @@ -4413,9 +4439,34 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (hasDominantValue && EltSize <= 32) { + if (!isConstant) { + SDValue N; + + // If we are VDUPing a value that comes directly from a vector, that will + // cause an unnecessary move to and from a GPR, where instead we could + // just use VDUPLANE. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) + N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, + Value->getOperand(0), Value->getOperand(1)); + else + N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector<SDValue, 3> Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i32)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + } + } + return N; + } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) @@ -4427,9 +4478,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); - if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + if (usesOnlyOneValue) { + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (isConstant && Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } } // If all elements are constants and the case above didn't get hit, fall back @@ -5697,7 +5750,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5808,7 +5861,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -7476,6 +7529,154 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); } +static SDValue findMUL_LOHI(SDValue V) { + if (V->getOpcode() == ISD::UMUL_LOHI || + V->getOpcode() == ISD::SMUL_LOHI) + return V; + return SDValue(); +} + +static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + if (Subtarget->isThumb1Only()) return SDValue(); + + // Only perform the checks after legalize when the pattern is available. + if (DCI.isBeforeLegalize()) return SDValue(); + + // Look for multiply add opportunities. + // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where + // each add nodes consumes a value from ISD::UMUL_LOHI and there is + // a glue link from the first add to the second add. + // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by + // a S/UMLAL instruction. + // loAdd UMUL_LOHI + // \ / :lo \ :hi + // \ / \ [no multiline comment] + // ADDC | hiAdd + // \ :glue / / + // \ / / + // ADDE + // + assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); + SDValue AddcOp0 = AddcNode->getOperand(0); + SDValue AddcOp1 = AddcNode->getOperand(1); + + // Check if the two operands are from the same mul_lohi node. + if (AddcOp0.getNode() == AddcOp1.getNode()) + return SDValue(); + + assert(AddcNode->getNumValues() == 2 && + AddcNode->getValueType(0) == MVT::i32 && + AddcNode->getValueType(1) == MVT::Glue && + "Expect ADDC with two result values: i32, glue"); + + // Check that the ADDC adds the low result of the S/UMUL_LOHI. + if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcOp1->getOpcode() != ISD::SMUL_LOHI) + return SDValue(); + + // Look for the glued ADDE. + SDNode* AddeNode = AddcNode->getGluedUser(); + if (AddeNode == NULL) + return SDValue(); + + // Make sure it is really an ADDE. + if (AddeNode->getOpcode() != ISD::ADDE) + return SDValue(); + + assert(AddeNode->getNumOperands() == 3 && + AddeNode->getOperand(2).getValueType() == MVT::Glue && + "ADDE node has the wrong inputs"); + + // Check for the triangle shape. + SDValue AddeOp0 = AddeNode->getOperand(0); + SDValue AddeOp1 = AddeNode->getOperand(1); + + // Make sure that the ADDE operands are not coming from the same node. + if (AddeOp0.getNode() == AddeOp1.getNode()) + return SDValue(); + + // Find the MUL_LOHI node walking up ADDE's operands. + bool IsLeftOperandMUL = false; + SDValue MULOp = findMUL_LOHI(AddeOp0); + if (MULOp == SDValue()) + MULOp = findMUL_LOHI(AddeOp1); + else + IsLeftOperandMUL = true; + if (MULOp == SDValue()) + return SDValue(); + + // Figure out the right opcode. + unsigned Opc = MULOp->getOpcode(); + unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; + + // Figure out the high and low input values to the MLAL node. + SDValue* HiMul = &MULOp; + SDValue* HiAdd = NULL; + SDValue* LoMul = NULL; + SDValue* LowAdd = NULL; + + if (IsLeftOperandMUL) + HiAdd = &AddeOp1; + else + HiAdd = &AddeOp0; + + + if (AddcOp0->getOpcode() == Opc) { + LoMul = &AddcOp0; + LowAdd = &AddcOp1; + } + if (AddcOp1->getOpcode() == Opc) { + LoMul = &AddcOp1; + LowAdd = &AddcOp0; + } + + if (LoMul == NULL) + return SDValue(); + + if (LoMul->getNode() != HiMul->getNode()) + return SDValue(); + + // Create the merged node. + SelectionDAG &DAG = DCI.DAG; + + // Build operand list. + SmallVector<SDValue, 8> Ops; + Ops.push_back(LoMul->getOperand(0)); + Ops.push_back(LoMul->getOperand(1)); + Ops.push_back(*LowAdd); + Ops.push_back(*HiAdd); + + SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + DAG.getVTList(MVT::i32, MVT::i32), + &Ops[0], Ops.size()); + + // Replace the ADDs' nodes uses by the MLA node's values. + SDValue HiMLALResult(MLALNode.getNode(), 1); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + + SDValue LoMLALResult(MLALNode.getNode(), 0); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + + // Return original node to notify the driver to stop replacing. + SDValue resNode(AddcNode, 0); + return resNode; +} + +/// PerformADDCCombine - Target-specific dag combine transform from +/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. +static SDValue PerformADDCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + + return AddCombineTo64bitMLAL(N, DCI, Subtarget); + +} + /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted @@ -9047,6 +9248,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 81b1d0323b..66ef07065c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -176,6 +176,9 @@ namespace llvm { VMULLs, // ...signed VMULLu, // ...unsigned + UMLAL, // 64bit Unsigned Accumulate Multiply + SMLAL, // 64bit Signed Accumulate Multiply + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -260,6 +263,11 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual bool isSelectSupported(SelectSupportKind Kind) const { + // ARM does not support scalar condition selects on vectors. + return (Kind != ScalarCondVectorVal); + } + /// getSetCCResultType - Return the value type to use for ISD::SETCC. virtual EVT getSetCCResultType(EVT VT) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 53c8b1715d..697b5b0111 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -83,6 +83,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisInt<0>, SDTCisVT<1, i32>, SDTCisVT<4, i32>]>; + +def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>, + SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >; +def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>; +def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; @@ -98,9 +105,10 @@ def ARMWrapperGOT : SDNode<"ARMISD::WrapperGOT", SDTPtrLeaf>; // @LOCALMOD-END def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, @@ -156,14 +164,16 @@ def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", - SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Setjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Longjmp, + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; @@ -288,7 +298,7 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - int64_t Value = -(int)N->getZExtValue(); + unsigned Value = -(unsigned)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; @@ -1846,12 +1856,15 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let Inst{15-12} = Rd; let Inst{11-0} = label{11-0}; } + +let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, []>; +} //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -3546,6 +3559,18 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{11-8} = Rm; let Inst{3-0} = Rn; } +class AsMla1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} // FIXME: The v5 pseudos are only necessary for the additional Constraint // property. Remove them when it's possible to add those properties @@ -3628,14 +3653,14 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), } // Multiply + accumulate -def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, +def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; -def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; +def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, @@ -3651,17 +3676,22 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), let Inst{3-0} = Rn; } -let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +let Constraints = "$RLo = $RdLo,$RHi = $RdHi" in { def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], - (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, + pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; +} + +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, IIC_iMAC64, [], @@ -4133,48 +4163,6 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -// Conditional instructions -multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, - Instruction irsr, - InstrItinClass iii, InstrItinClass iir, - InstrItinClass iis> { - def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, - pred:$p, cc_out:$s), - 4, iis, [], - (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} - -defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, - IIC_iBITi, IIC_iBITr, IIC_iBITsr>; - } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 048d340df0..8158a11f83 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1980,7 +1980,7 @@ def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, @@ -2023,7 +2023,7 @@ def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{5}; + let Inst{4} = Rn{4}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt, addrmode6oneL32> { @@ -5045,25 +5045,23 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } + +def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), + GPR:$R, imm:$lane))]>; +def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), + (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "", + [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), + GPR:$R, imm:$lane))]>; } -def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, - (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i8_reg imm:$lane))), - GPR:$src2, (SubReg_i8_lane imm:$lane))), - (DSubReg_i8_reg imm:$lane)))>; -def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, - (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i16_reg imm:$lane))), - GPR:$src2, (SubReg_i16_lane imm:$lane))), - (DSubReg_i16_reg imm:$lane)))>; + def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (DSubReg_i32_reg imm:$lane))), - GPR:$src2, (SubReg_i32_lane imm:$lane))), - (DSubReg_i32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + GPR:$src2, + (SSubReg_f32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 554f6d9f94..e171f8b092 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1200,6 +1200,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>; +let hasSideEffects = 1 in def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 2, IIC_iALUi, []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index a8f754bb4f..2bb667ef37 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -523,6 +523,23 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, let Inst{7-4} = opc7_4; let Inst{3-0} = Rm; } +class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-23} = 0b111110111; + let Inst{22-20} = opc22_20; + let Inst{19-16} = Rn; + let Inst{15-12} = RdLo; + let Inst{11-8} = RdHi; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a @@ -757,33 +774,6 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } - - // Predicated versions. - def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, - pred:$p), - 4, IIC_iALUi, [], - (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, - GPR:$Rn, imm0_4095:$imm, pred:$p)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUr, [], - (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, - GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, - pred:$p, cc_out:$s), 4, IIC_iALUsi, [], - (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, - GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -1200,6 +1190,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let neverHasSideEffects = 1, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>; +let hasSideEffects = 1 in def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), 4, IIC_iALUi, @@ -2437,15 +2428,17 @@ def t2UMULL : T2MulLong<0b010, 0b0000, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2MulLong<0b100, 0b0000, +def t2SMLAL : T2MlaLong<0b100, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; -def t2UMLAL : T2MulLong<0b110, 0b0000, +def t2UMLAL : T2MlaLong<0b110, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), - (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, - "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">; def t2UMAAL : T2MulLong<0b110, 0b0110, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -3049,37 +3042,6 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), RegConstraint<"$false = $Rd">; } // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - // shifted imm - def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, - pred:$p, cc_out:$s), - 4, iii, [], - (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // register - def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, - pred:$p, cc_out:$s), - 4, iir, [], - (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; - // shifted register - def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, - pred:$p, cc_out:$s), - 4, iis, [], - (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rfalse = $Rd">; -} // T2I_bincc_irs - -defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, - IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 3f99cce146..254d8f6b7c 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -168,7 +168,7 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); DEBUG(if (F) errs() << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" << F->getName() << "'\n"; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3a5957b241..e1e2f6ea73 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -181,49 +181,44 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods - bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtT2StrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, + void cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtStrdPre(MCInst &Inst, unsigned Opcode, - const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); + void cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, + void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + void cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + void cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, @@ -267,6 +262,12 @@ public: bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); + } }; } // end anonymous namespace @@ -3880,8 +3881,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// cvtT2LdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3892,14 +3893,13 @@ cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtT2StrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateReg(0)); @@ -3910,14 +3910,13 @@ cvtT2StrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3926,28 +3925,26 @@ cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3956,14 +3953,13 @@ cvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -3972,57 +3968,53 @@ cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrModeImm12(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode2(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4034,14 +4026,13 @@ cvtLdExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4053,14 +4044,13 @@ cvtLdExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackImm(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4072,14 +4062,13 @@ cvtStExtTWriteBackImm(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStExtTWriteBackReg(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4091,14 +4080,13 @@ cvtStExtTWriteBackReg(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Rt, Rt2 ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); @@ -4109,14 +4097,13 @@ cvtLdrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtStrdPre - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtStrdPre(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4127,40 +4114,27 @@ cvtStrdPre(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtLdWriteBackRegAddrMode3(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -/// cvtThumbMultiple- Convert parsed operands to MCInst. +/// cvtThumbMultiply - Convert parsed operands to MCInst. /// Needed here because the Asm Gen Matcher can't handle properly tied operands /// when they refer multiple MIOperands inside a single one. -bool ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The second source operand must be the same register as the destination - // operand. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { - Error(Operands[3]->getStartLoc(), - "destination register must match source register"); - return false; - } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand @@ -4173,12 +4147,10 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); - - return true; } -bool ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4188,11 +4160,10 @@ cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); @@ -4204,11 +4175,10 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4218,11 +4188,10 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } -bool ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, +void ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); @@ -4234,7 +4203,6 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); - return true; } /// Parse an ARM memory expression, return false if successful else return true @@ -5377,6 +5345,25 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + case ARM::tMUL: { + // The second source operand must be the same register as the destination + // operand. + // + // In this case, we must directly check the parsed operands because the + // cvtThumbMultiply() function is written in such a way that it guarantees + // this first statement is always true for the new Inst. Essentially, the + // destination is unconditionally copied into the second source operand + // without checking to see if it matches what we actually parsed. + if (Operands.size() == 6 && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[5])->getReg()) && + (((ARMOperand*)Operands[3])->getReg() != + ((ARMOperand*)Operands[4])->getReg())) { + return Error(Operands[3]->getStartLoc(), + "destination register must match source register"); + } + break; + } // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. @@ -7475,9 +7462,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; + unsigned Kind; unsigned ErrorInfo; unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); + + MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); switch (MatchResult) { default: break; case Match_Success: @@ -7540,9 +7529,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", ((ARMOperand*)Operands[0])->getLocRange()); - case Match_ConversionFail: - // The converter function will have already emitted a diagnostic. - return true; case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index c90751d0b9..57642e1924 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2701,6 +2701,8 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); unsigned size = fieldFromInstruction(Insn, 6, 2); + if (size == 0 && align == 1) + return MCDisassembler::Fail; align *= (1 << size); switch (Inst.getOpcode()) { @@ -2831,6 +2833,8 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { + if (align == 0) + return MCDisassembler::Fail; size = 4; align = 16; } else { @@ -3170,7 +3174,7 @@ static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + Inst.addOperand(MCOperand::CreateImm(imm * 4)); } return MCDisassembler::Success; @@ -3710,8 +3714,16 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0 : + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -3769,8 +3781,16 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED index = fieldFromInstruction(Insn, 7, 1); - if (fieldFromInstruction(Insn, 4, 2) != 0) - align = 4; + + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + align = 4; break; + default: + return MCDisassembler::Fail; + } + break; } if (Rm != 0xF) { // Writeback @@ -4090,8 +4110,15 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; @@ -4164,8 +4191,15 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, inc = 2; break; case 2: - if (fieldFromInstruction(Insn, 4, 2)) - align = 4 << fieldFromInstruction(Insn, 4, 2); + switch (fieldFromInstruction(Insn, 4, 2)) { + case 0: + align = 0; break; + case 3: + return MCDisassembler::Fail; + default: + align = 4 << fieldFromInstruction(Insn, 4, 2); break; + } + index = fieldFromInstruction(Insn, 7, 1); if (fieldFromInstruction(Insn, 6, 1)) inc = 2; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 4d922d9b44..7a7ce27d48 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -195,6 +195,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + Type = ELF::R_ARM_THM_JUMP24; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: Type = ELF::R_ARM_MOVT_PREL; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 94f1082b5f..1917564904 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -783,7 +783,7 @@ getT2Imm8s4OpValue(const MCInst &MI, unsigned OpIdx, // Immediate is always encoded as positive. The 'U' bit controls add vs sub. if (Imm8 < 0) - Imm8 = -Imm8; + Imm8 = -(uint32_t)Imm8; // Scaled by 4. Imm8 /= 4; diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 03d5a9ae0c..3396e8b1ef 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -130,8 +130,7 @@ namespace { void printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value >= -(1 << 9) && value <= (1 << 9) - 1) && "Invalid s10 argument"); O << value; @@ -140,8 +139,7 @@ namespace { void printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); + short value = MI->getOperand(OpNo).getImm(); assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); O << value; } diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index c27caeae7d..425371d3e1 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -83,12 +83,10 @@ namespace { return true; } else if (vt == MVT::i32) { int32_t i_val = (int32_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend32<16>(i_val); } else { int64_t i_val = (int64_t) CN->getZExtValue(); - short s_val = (short) i_val; - return i_val == s_val; + return i_val == SignExtend64<16>(i_val); } } @@ -99,9 +97,10 @@ namespace { EVT vt = FPN->getValueType(0); if (vt == MVT::f32) { int val = FloatToBits(FPN->getValueAPF().convertToFloat()); - int sval = (int) ((val << 16) >> 16); - Imm = (short) val; - return val == sval; + if (val == SignExtend32<16>(val)) { + Imm = (short) val; + return true; + } } return false; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 1f2d8accbb..306084bb8c 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonMachineScheduler.cpp HexagonMCInstLower.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp new file mode 100644 index 0000000000..6a37639889 --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -0,0 +1,874 @@ +//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "HexagonMachineScheduler.h" + +#include <queue> + +using namespace llvm; + +static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. +void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. +void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void VLIWMachineScheduler::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void VLIWMachineScheduler::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Advance RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + ++RegionBegin; + + // Update the instruction stream. + BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals + LIS->handleMove(MI); + + // Recede RegionBegin if an instruction moves above the first. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool VLIWMachineScheduler::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = + (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void VLIWMachineScheduler::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.getPressure().dump(TRI)); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) + BotRPTracker.recede(); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) + RegionCriticalPSets.push_back(PressureElement(i, 0)); + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].PSetID) << " "; + dbgs() << "\n"); + + // Reset resource state. + TopResourceModel->resetPacketState(); + TopResourceModel->resetDFA(); + BotResourceModel->resetPacketState(); + BotResourceModel->resetDFA(); + TotalPackets = 0; +} + +// FIXME: When the pressure tracker deals in pressure differences then we won't +// iterate over all RegionCriticalPSets[i]. +void VLIWMachineScheduler:: +updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { + for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { + unsigned ID = RegionCriticalPSets[i].PSetID; + int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; + if ((int)NewMaxPressure[ID] > MaxUnits) + MaxUnits = NewMaxPressure[ID]; + } +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +/// It is _not_ precise (statefull), it is more like +/// another heuristic. Many corner cases are figured +/// empirically. +bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getInstr()) + return false; + + // First see if the pipeline could receive this instruction + // in the current cycle. + switch (SU->getInstr()->getOpcode()) { + default: + if (!ResourcesModel->canReserveResources(SU->getInstr())) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + + // Now see if there are no other dependencies to instructions already + // in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + if (Packet[i]->Succs.size() == 0) + continue; + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + } + return true; +} + +/// Keep track of available resources. +void VLIWResourceModel::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU)) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + } + + switch (SU->getInstr()->getOpcode()) { + default: + ResourcesModel->reserveResources(SU->getInstr()); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::COPY: + case TargetOpcode::INLINEASM: + break; + } + Packet.push_back(SU); + +#ifndef NDEBUG + DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + for (unsigned i = 0, e = Packet.size(); i != e; ++i) { + DEBUG(dbgs() << "\t[" << i << "] SU("); + DEBUG(dbgs() << Packet[i]->NodeNum << ")\n"); + } +#endif + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + TotalPackets++; + } +} + +// Release all DAG roots for scheduling. +void VLIWMachineScheduler::releaseRoots() { + SmallVector<SUnit*, 16> BotRoots; + + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +void VLIWMachineScheduler::schedule() { + DEBUG(dbgs() + << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() + << " " << BB->getName() + << " in_func " << BB->getParent()->getFunction()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) + << " \n"); + + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + // Release all DAG roots for scheduling. + releaseRoots(); + + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + if (!checkSchedLimit()) + break; + + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + + // Update DFA state. + TopResourceModel->reserveResources(SU); + + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + + // Update DFA state. + BotResourceModel->reserveResources(SU); + + // Release dependent instructions for scheduling. + releasePredecessors(SU); + } + SU->isScheduled = true; + SchedImpl->schedNode(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() + + BotResourceModel->getTotalPackets()<< "packets.\n"); + + placeDebugValues(); +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void VLIWMachineScheduler::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; +} + +void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) { + DAG = dag; + TRI = DAG->TRI; + Top.DAG = dag; + Bot.DAG = dag; + + // Initialize the HazardRecognizers. + const TargetMachine &TM = DAG->MF.getTarget(); + const InstrItineraryData *Itin = TM.getInstrItineraryData(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + MinLatency) + SU->TopReadyCycle = PredReadyCycle + MinLatency; + } + Top.releaseNode(SU, SU->TopReadyCycle); +} + +void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned MinLatency = I->getMinLatency(); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) + SU->BotReadyCycle = SuccReadyCycle + MinLatency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + return true; + + return false; +} + +void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { + unsigned Width = DAG->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { + + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += DAG->getNumMicroOps(SU->getInstr()); + if (IssueCount >= DAG->getIssueWidth()) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingVLIWScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, + SUnit *SU, PressureElement P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityThree = 50; +static const unsigned ScaleTwo = 10; +static const unsigned FactorOne = 2; + +/// Single point to compute overall scheduling cost. +/// TODO: More heuristics will be used soon. +int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, + SchedCandidate &Candidate, + RegPressureDelta &Delta, + bool verbose) { + // Initial trivial priority. + int ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (!SU || SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Critical path first. + if (Q.getID() == TopQID) + ResCount += (SU->getHeight() * ScaleTwo); + else + ResCount += (SU->getDepth() * ScaleTwo); + + // If resources are available for it, multiply the + // chance of scheduling. + if (DAG->getTopResourceModel()->isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Factor in reg pressure as a heuristic. + ResCount -= (Delta.Excess.UnitIncrease * PriorityThree); + ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree); + + DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); + + return ResCount; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + continue; + } + + + // Best cost. + if (CurrentCost > Candidate.SCost) { + DEBUG(traceCandidate("CCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = BestCost; + continue; + } + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + if (TopCand.SCost > BotCand.SCost) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; + } else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update +/// it's state based on the current cycle before MachineSchedStrategy does. +void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } + else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} + diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h new file mode 100644 index 0000000000..7d8cc3d24e --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -0,0 +1,423 @@ +//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Custom Hexagon MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// + +namespace llvm { +class VLIWMachineScheduler; + +/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive the selected +/// scheduling algorithm. +/// +/// If this works well and targets wish to reuse VLIWMachineScheduler, we may expose it +/// in ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(VLIWMachineScheduler *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// Notify MachineSchedStrategy that VLIWMachineScheduler has scheduled a node. + virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; + +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience +/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified +/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. +class ReadyQueue { + unsigned ID; + std::string Name; + std::vector<SUnit*> Queue; + +public: + ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} + + unsigned getID() const { return ID; } + + StringRef getName() const { return Name; } + + // SU is in this queue if it's NodeQueueID is a superset of this ID. + bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } + + bool empty() const { return Queue.empty(); } + + unsigned size() const { return Queue.size(); } + + typedef std::vector<SUnit*>::iterator iterator; + + iterator begin() { return Queue.begin(); } + + iterator end() { return Queue.end(); } + + iterator find(SUnit *SU) { + return std::find(Queue.begin(), Queue.end(), SU); + } + + void push(SUnit *SU) { + Queue.push_back(SU); + SU->NodeQueueId |= ID; + } + + void remove(iterator I) { + (*I)->NodeQueueId &= ~ID; + *I = Queue.back(); + Queue.pop_back(); + } + + void dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; + } +}; + +/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingVLIWScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingVLIWScheduler heuristics, required for the + /// lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + // Best scheduling cost. + int SCost; + + SchedCandidate(): SU(NULL), SCost(0) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, + BestCost}; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + VLIWMachineScheduler *DAG; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { delete HazardRec; } + + bool isTop() const { + return Available.getID() == ConvergingVLIWScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + + VLIWMachineScheduler *DAG; + const TargetRegisterInfo *TRI; + + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; + +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingVLIWScheduler(): + DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(VLIWMachineScheduler *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); + + int SchedulingCost(ReadyQueue &Q, + SUnit *SU, SchedCandidate &Candidate, + RegPressureDelta &Delta, bool verbose); + + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureElement P = PressureElement()); +#endif +}; + +class VLIWResourceModel { + /// ResourcesModel - Represents VLIW state. + /// Not limited to VLIW targets per say, but assumes + /// definition of DFA by a target. + DFAPacketizer *ResourcesModel; + + const InstrItineraryData *InstrItins; + + /// Local packet/bundle model. Purely + /// internal to the MI schedulre at the time. + std::vector<SUnit*> Packet; + + /// Total packets created. + unsigned TotalPackets; + +public: + VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) : + InstrItins(IID), TotalPackets(0) { + const TargetMachine &TM = C->MF->getTarget(); + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + + // This hard requirement could be relaxed, but for now do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + Packet.resize(InstrItins->SchedModel->IssueWidth); + Packet.clear(); + ResourcesModel->clearResources(); + } + + ~VLIWResourceModel() { + delete ResourcesModel; + } + + void resetPacketState() { + Packet.clear(); + } + + void resetDFA() { + ResourcesModel->clearResources(); + } + + bool isResourceAvailable(SUnit *SU); + void reserveResources(SUnit *SU); + unsigned getTotalPackets() const { return TotalPackets; } +}; + +class VLIWMachineScheduler : public ScheduleDAGInstrs { + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + + RegisterClassInfo *RegClassInfo; + MachineSchedStrategy *SchedImpl; + + /// state separatly for top/bottom sectioins. + VLIWResourceModel *TopResourceModel; + VLIWResourceModel *BotResourceModel; + + MachineBasicBlock::iterator LiveRegionEnd; + + /// Register pressure in this region computed by buildSchedGraph. + IntervalPressure RegPressure; + RegPressureTracker RPTracker; + + /// List of pressure sets that exceed the target's pressure limit before + /// scheduling, listed in increasing set ID order. Each pressure set is paired + /// with its max pressure in the currently scheduled regions. + std::vector<PressureElement> RegionCriticalPSets; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + IntervalPressure BotPressure; + RegPressureTracker BotRPTracker; + +#ifndef NDEBUG + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; +#endif + + /// Total packets in the region. + unsigned TotalPackets; + + const MachineLoopInfo *MLI; +public: + VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), + RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), + CurrentBottom(), BotRPTracker(BotPressure), MLI(C->MLI) { + + TopResourceModel = new VLIWResourceModel(C, InstrItins); + BotResourceModel = new VLIWResourceModel(C, InstrItins); + +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + TotalPackets = 0; + } + + virtual ~VLIWMachineScheduler() { + delete SchedImpl; + delete TopResourceModel; + delete BotResourceModel; + } + + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } + + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's + /// time to do some work. + void schedule(); + + unsigned CurCycle; + + /// Get current register pressure for the top scheduled instructions. + const IntervalPressure &getTopPressure() const { return TopPressure; } + const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } + + /// Get current register pressure for the bottom scheduled instructions. + const IntervalPressure &getBotPressure() const { return BotPressure; } + const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } + + /// Get register pressure for the entire scheduling region before scheduling. + const IntervalPressure &getRegPressure() const { return RegPressure; } + + const std::vector<PressureElement> &getRegionCriticalPSets() const { + return RegionCriticalPSets; + } + + VLIWResourceModel *getTopResourceModel() { return TopResourceModel; } + VLIWResourceModel *getBotResourceModel() { return BotResourceModel; } + + /// getIssueWidth - Return the max instructions per scheduling group. + unsigned getIssueWidth() const { + return (InstrItins && InstrItins->SchedModel) + ? InstrItins->SchedModel->IssueWidth : 1; + } + + /// getNumMicroOps - Return the number of issue slots required for this MI. + unsigned getNumMicroOps(MachineInstr *MI) const { + if (!InstrItins) return 1; + int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); + } + +private: + void scheduleNodeTopDown(SUnit *SU); + void listScheduleTopDown(); + + void initRegPressure(); + void updateScheduledPressure(std::vector<unsigned> NewMaxPressure); + + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + + void releaseRoots(); + + void releaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); + + void placeDebugValues(); +}; +} // namespace + + +#endif diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 7ece4085ec..1e91c39485 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -337,7 +337,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); #if 0 // for now disable this, if we move NewValueJump before register diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 55cbc094a2..a295015de5 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -109,6 +109,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DenseMap<unsigned, unsigned> PeepholeMap; + DenseMap<unsigned, std::pair<unsigned, unsigned> > PeepholeDoubleRegsMap; if (DisableHexagonPeephole) return false; @@ -117,6 +118,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { MBBb != MBBe; ++MBBb) { MachineBasicBlock* MBB = MBBb; PeepholeMap.clear(); + PeepholeDoubleRegsMap.clear(); // Traverse the basic block. for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); @@ -140,6 +142,24 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } } + // Look for this sequence below + // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 + // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg. + // and convert into + // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg. + if (MI->getOpcode() == Hexagon::LSRd_ri) { + assert(MI->getNumOperands() == 3); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src1 = MI->getOperand(1); + MachineOperand &Src2 = MI->getOperand(2); + if (Src2.getImm() != 32) + continue; + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src1.getReg(); + PeepholeDoubleRegsMap[DstReg] = + std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/); + } + // Look for P=NOT(P). if (!DisablePNotP && (MI->getOpcode() == Hexagon::NOT_p)) { @@ -178,6 +198,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { // Change the 1st operand. MI->RemoveOperand(1); MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } else { + DenseMap<unsigned, std::pair<unsigned, unsigned> >::iterator DI = + PeepholeDoubleRegsMap.find(SrcReg); + if (DI != PeepholeDoubleRegsMap.end()) { + std::pair<unsigned,unsigned> PeepholeSrc = DI->second; + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc.first, + false /*isDef*/, + false /*isImp*/, + false /*isKill*/, + false /*isDead*/, + false /*isUndef*/, + false /*isEarlyClobber*/, + PeepholeSrc.second)); + } } } } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2c23674a33..3742486056 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> Moves.push_back(MachineMove(0, Dst, Src)); } +// Get the weight in units of pressure for this register class. +const RegClassWeight & +HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const { + // Each TargetRegisterClass has a per register weight, and weight + // limit which must be less than the limits of its pressure sets. + static const RegClassWeight RCWeightTable[] = { + {1, 32}, // IntRegs + {1, 8}, // CRRegs + {1, 4}, // PredRegs + {2, 16}, // DoubleRegs + {0, 0} }; + return RCWeightTable[RC->getID()]; +} + +/// Get the number of dimensions of register pressure. +unsigned HexagonRegisterInfo::getNumRegPressureSets() const { + return 4; +} + +/// Get the name of this register unit pressure set. +const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const { + static const char *const RegPressureSetName[] = { + "IntRegsRegSet", + "CRRegsRegSet", + "PredRegsRegSet", + "DoubleRegsRegSet" + }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureSetName[Idx]; +} + +/// Get the register unit pressure limit for this dimension. +/// This limit must be adjusted dynamically for reserved registers. +unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const { + static const int RegPressureLimit [] = { 16, 4, 2, 8 }; + assert((Idx < 4) && "Index out of bounds"); + return RegPressureLimit[Idx]; +} + +const int* +HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC) + const { + static const int RCSetsTable[] = { + 0, -1, // IntRegs + 1, -1, // CRRegs + 2, -1, // PredRegs + 0, -1, // DoubleRegs + -1 }; + static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 }; + unsigned SetListStart = RCSetStartTable[RC->getID()]; + return &RCSetsTable[SetListStart]; +} unsigned HexagonRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 85355ae7be..8820d13e01 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { // Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; + unsigned getNumRegPressureSets() const; + const char *getRegPressureSetName(unsigned Idx) const; + unsigned getRegPressureSetLimit(unsigned Idx) const; + const int* getRegClassPressureSets(const TargetRegisterClass *RC) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index d1076b8e44..b5ff69a701 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItineraries; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 9b41126ca6..5668ae81e8 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItinerariesV4; + let LoadLatency = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index a7b291ff2a..5688e9cbec 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -14,6 +14,7 @@ #include "HexagonTargetMachine.h" #include "Hexagon.h" #include "HexagonISelLowering.h" +#include "HexagonMachineScheduler.h" #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" @@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops( "disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); +static cl:: +opt<bool> DisableHexagonMISched("disable-hexagon-misched", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon MI Scheduling")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); } +static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { + return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler()); +} + +static MachineSchedRegistry +SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", + createVLIWMachineSched); /// HexagonTargetMachine ctor - Create an ILP32 architecture model. /// @@ -83,7 +96,13 @@ namespace { class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + // Enable MI scheduler. + if (!DisableHexagonMISched) { + enablePass(&MachineSchedulerID); + MachineSchedRegistry::setDefault(createVLIWMachineSched); + } + } HexagonTargetMachine &getHexagonTargetMachine() const { return getTM<HexagonTargetMachine>(); diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index a03ed03365..3d5f685028 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -3474,8 +3474,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 1. Two loads unless they are volatile. // 2. Two stores in V4 unless they are volatile. else if ((DepType == SDep::Order) && - !I->hasVolatileMemoryRef() && - !J->hasVolatileMemoryRef()) { + !I->hasOrderedMemoryRef() && + !J->hasOrderedMemoryRef()) { if (QRI->Subtarget.hasV4TOps() && // hexagonv4 allows dual store. MCIDI.mayStore() && MCIDJ.mayStore()) { diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 38fb0e87fd..40594030e5 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -56,6 +56,12 @@ class MBlazeAsmParser : public MCTargetAsmParser { /// } + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, + NumMCOperands); + } public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) @@ -317,10 +323,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { MCInst Inst; - SMLoc ErrorLoc; + unsigned Kind; unsigned ErrorInfo; - switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + switch (MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo)) { default: break; case Match_Success: Out.EmitInstruction(Inst); @@ -329,10 +335,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction use requires an option to be enabled"); case Match_MnemonicFail: return Error(IDLoc, "unrecognized instruction mnemonic"); - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); - case Match_InvalidOperand: - ErrorLoc = IDLoc; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -343,6 +347,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } + } llvm_unreachable("Implement any new match types added!"); } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 46f5207a90..daa76e887f 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -140,7 +140,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned oi = i == 2 ? 1 : 2; - DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(dbgs() << "\nFunction : " << MF.getName() << "\n"; dbgs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 43bd345208..acdd8463f7 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -8,20 +8,36 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "MipsRegisterInfo.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace { + class MipsAsmParser : public MCTargetAsmParser { + enum FpFormatTy { + FP_FORMAT_NONE = -1, + FP_FORMAT_S, + FP_FORMAT_D, + FP_FORMAT_L, + FP_FORMAT_W + } FpFormat; + + MCSubtargetInfo &STI; + MCAsmParser &Parser; + #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" @@ -34,14 +50,67 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseDirective(AsmToken DirectiveID); - OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + MipsAsmParser::OperandMatchResultTy + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + + unsigned + getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands); + + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, + StringRef Mnemonic); + + int tryParseRegister(StringRef Mnemonic); + + bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic); + + bool parseMemOffset(const MCExpr *&Res); + bool parseRelocOperand(const MCExpr *&Res); + MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); + + bool isMips64() const { + return (STI.getFeatureBits() & Mips::FeatureMips64) != 0; + } + + bool isFP64() const { + return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; + } + + int matchRegisterName(StringRef Symbol); + + int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + + void setFpFormat(FpFormatTy Format) { + FpFormat = Format; + } + + void setDefaultFpFormat(); + + void setFpFormat(StringRef Format); + + FpFormatTy getFpFormat() {return FpFormat;} + + bool requestsDoubleOperand(StringRef Mnemonic); + + unsigned getReg(int RC,int RegNo); + public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser() { + : MCTargetAsmParser(), STI(sti), Parser(parser) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + }; } @@ -50,6 +119,7 @@ namespace { /// MipsOperand - Instances of this class represent a parsed Mips machine /// instruction. class MipsOperand : public MCParsedAsmOperand { + enum KindTy { k_CondCode, k_CoprocNum, @@ -61,18 +131,58 @@ class MipsOperand : public MCParsedAsmOperand { } Kind; MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned Base; + const MCExpr *Off; + } Mem; + }; + + SMLoc StartLoc, EndLoc; + public: void addRegOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ - llvm_unreachable("unimplemented!"); + // Add as immediate when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); } + void addImmOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst,Expr); } + void addMemOperands(MCInst &Inst, unsigned N) const { - llvm_unreachable("unimplemented!"); + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBase())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst,Expr); } bool isReg() const { return Kind == k_Register; } @@ -82,46 +192,752 @@ public: StringRef getToken() const { assert(Kind == k_Token && "Invalid access!"); - return ""; + return StringRef(Tok.Data, Tok.Length); } unsigned getReg() const { assert((Kind == k_Register) && "Invalid access!"); - return 0; + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; } + unsigned getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + static MipsOperand *CreateToken(StringRef Str, SMLoc S) { + MipsOperand *Op = new MipsOperand(k_Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static MipsOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off, + SMLoc S, SMLoc E) { + MipsOperand *Op = new MipsOperand(k_Memory); + Op->Mem.Base = Base; + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + virtual void print(raw_ostream &OS) const { llvm_unreachable("unimplemented!"); } }; } +unsigned MipsAsmParser:: +getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + assert (0 && "getMCInstOperandNum() not supported by the Mips target."); + // The Mips backend doesn't currently include the matcher implementation, so + // the getMCInstOperandNumImpl() is undefined. This is a temporary + // work around. + NumMCOperands = 0; + return 0; +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { + MCInst Inst; + unsigned ErrorInfo; + unsigned Kind; + unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst, ErrorInfo); + + switch (MatchResult) { + default: break; + case Match_Success: { + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst); + return false; + } + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } return true; } +int MipsAsmParser::matchRegisterName(StringRef Name) { + + int CC = StringSwitch<unsigned>(Name) + .Case("zero", Mips::ZERO) + .Case("a0", Mips::A0) + .Case("a1", Mips::A1) + .Case("a2", Mips::A2) + .Case("a3", Mips::A3) + .Case("v0", Mips::V0) + .Case("v1", Mips::V1) + .Case("s0", Mips::S0) + .Case("s1", Mips::S1) + .Case("s2", Mips::S2) + .Case("s3", Mips::S3) + .Case("s4", Mips::S4) + .Case("s5", Mips::S5) + .Case("s6", Mips::S6) + .Case("s7", Mips::S7) + .Case("k0", Mips::K0) + .Case("k1", Mips::K1) + .Case("sp", Mips::SP) + .Case("fp", Mips::FP) + .Case("gp", Mips::GP) + .Case("ra", Mips::RA) + .Case("t0", Mips::T0) + .Case("t1", Mips::T1) + .Case("t2", Mips::T2) + .Case("t3", Mips::T3) + .Case("t4", Mips::T4) + .Case("t5", Mips::T5) + .Case("t6", Mips::T6) + .Case("t7", Mips::T7) + .Case("t8", Mips::T8) + .Case("t9", Mips::T9) + .Case("at", Mips::AT) + .Case("fcc0", Mips::FCC0) + .Default(-1); + + if (CC != -1) { + //64 bit register in Mips are following 32 bit definitions. + if (isMips64()) + CC++; + return CC; + } + + if (Name[0] == 'f') { + StringRef NumString = Name.substr(1); + unsigned IntVal; + if( NumString.getAsInteger(10, IntVal)) + return -1; //not integer + if (IntVal > 31) + return -1; + + FpFormatTy Format = getFpFormat(); + + if (Format == FP_FORMAT_S || Format == FP_FORMAT_W) + return getReg(Mips::FGR32RegClassID, IntVal); + if (Format == FP_FORMAT_D) { + if(isFP64()) { + return getReg(Mips::FGR64RegClassID, IntVal); + } + //only even numbers available as register pairs + if (( IntVal > 31) || (IntVal%2 != 0)) + return -1; + return getReg(Mips::AFGR64RegClassID, IntVal/2); + } + } + + return -1; +} +void MipsAsmParser::setDefaultFpFormat() { + + if (isMips64() || isFP64()) + FpFormat = FP_FORMAT_D; + else + FpFormat = FP_FORMAT_S; +} + +bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){ + + bool IsDouble = StringSwitch<bool>(Mnemonic.lower()) + .Case("ldxc1", true) + .Case("ldc1", true) + .Case("sdxc1", true) + .Case("sdc1", true) + .Default(false); + + return IsDouble; +} +void MipsAsmParser::setFpFormat(StringRef Format) { + + FpFormat = StringSwitch<FpFormatTy>(Format.lower()) + .Case(".s", FP_FORMAT_S) + .Case(".d", FP_FORMAT_D) + .Case(".l", FP_FORMAT_L) + .Case(".w", FP_FORMAT_W) + .Default(FP_FORMAT_NONE); +} + +unsigned MipsAsmParser::getReg(int RC,int RegNo){ + return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); +} + +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum,StringRef Mnemonic) { + + if (Mnemonic.lower() == "rdhwr") { + //at the moment only hwreg29 is supported + if (RegNum != 29) + return -1; + return Mips::HWR29; + } + + if (RegNum > 31) + return -1; + + return getReg(Mips::CPURegsRegClassID,RegNum); +} + +int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { + const AsmToken &Tok = Parser.getTok(); + int RegNum = -1; + + if (Tok.is(AsmToken::Identifier)) { + std::string lowerCase = Tok.getString().lower(); + RegNum = matchRegisterName(lowerCase); + } else if (Tok.is(AsmToken::Integer)) + RegNum = matchRegisterByNumber(static_cast<unsigned> (Tok.getIntVal()), + Mnemonic.lower()); + else + return RegNum; //error + //64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 + if (isMips64() && RegNum == Mips::ZERO_64) { + if (Mnemonic.find("ddiv") != StringRef::npos) + RegNum = Mips::ZERO; + } + return RegNum; +} + bool MipsAsmParser:: -ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic){ + + SMLoc S = Parser.getTok().getLoc(); + int RegNo = -1; + + //FIXME: we should make a more generic method for CCR + if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") + && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ + RegNo = Parser.getTok().getIntVal(); //get the int value + //at the moment only fcc0 is supported + if (RegNo == 0) + RegNo = Mips::FCC0; + } else + RegNo = tryParseRegister(Mnemonic); + if (RegNo == -1) + return true; + + Operands.push_back(MipsOperand::CreateReg(RegNo, S, + Parser.getTok().getLoc())); + Parser.Lex(); // Eat register token. + return false; +} + +bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, + StringRef Mnemonic) { + //Check if the current operand has a custom associated parser, if so, try to + //custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return true; + case AsmToken::Dollar: { + //parse register + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat dollar token. + //parse register operand + if (!tryParseRegisterOperand(Operands,Mnemonic)) { + if (getLexer().is(AsmToken::LParen)) { + //check if it is indexed addressing operand + Operands.push_back(MipsOperand::CreateToken("(", S)); + Parser.Lex(); //eat parenthesis + if (getLexer().isNot(AsmToken::Dollar)) + return true; + + Parser.Lex(); //eat dollar + if (tryParseRegisterOperand(Operands,Mnemonic)) + return true; + + if (!getLexer().is(AsmToken::RParen)) + return true; + + S = Parser.getTok().getLoc(); + Operands.push_back(MipsOperand::CreateToken(")", S)); + Parser.Lex(); + } + return false; + } + //maybe it is a symbol reference + StringRef Identifier; + if (Parser.ParseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier); + + // Otherwise create a symbol ref. + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + + Operands.push_back(MipsOperand::CreateImm(Res, S, E)); + return false; + } + case AsmToken::Identifier: + case AsmToken::LParen: + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::Integer: + case AsmToken::String: { + // quoted label names + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); + if (getParser().ParseExpression(IdVal)) + return true; + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + } + case AsmToken::Percent: { + //it is a symbol reference or constant expression + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); //start location of the operand + if (parseRelocOperand(IdVal)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); + return false; + }//case AsmToken::Percent + }//switch(getLexer().getKind()) return true; } +bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { + + Parser.Lex(); //eat % token + const AsmToken &Tok = Parser.getTok(); //get next token, operation + if (Tok.isNot(AsmToken::Identifier)) + return true; + + std::string Str = Tok.getIdentifier().str(); + + Parser.Lex(); //eat identifier + //now make expression from the rest of the operand + const MCExpr *IdVal; + SMLoc EndLoc; + + if (getLexer().getKind() == AsmToken::LParen) { + while (1) { + Parser.Lex(); //eat '(' token + if (getLexer().getKind() == AsmToken::Percent) { + Parser.Lex(); //eat % token + const AsmToken &nextTok = Parser.getTok(); + if (nextTok.isNot(AsmToken::Identifier)) + return true; + Str += "(%"; + Str += nextTok.getIdentifier(); + Parser.Lex(); //eat identifier + if (getLexer().getKind() != AsmToken::LParen) + return true; + } else + break; + } + if (getParser().ParseParenExpression(IdVal,EndLoc)) + return true; + + while (getLexer().getKind() == AsmToken::RParen) + Parser.Lex(); //eat ')' token + + } else + return true; //parenthesis must follow reloc operand + + //Check the type of the expression + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { + //it's a constant, evaluate lo or hi value + int Val = MCE->getValue(); + if (Str == "lo") { + Val = Val & 0xffff; + } else if (Str == "hi") { + Val = (Val & 0xffff0000) >> 16; + } + Res = MCConstantExpr::Create(Val, getContext()); + return false; + } + + if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { + //it's a symbol, create symbolic expression from symbol + StringRef Symbol = MSRE->getSymbol().getName(); + MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); + Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); + return false; + } + return true; +} + +bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + + StartLoc = Parser.getTok().getLoc(); + RegNo = tryParseRegister(""); + EndLoc = Parser.getTok().getLoc(); + return (RegNo == (unsigned)-1); +} + +bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { + + SMLoc S; + + switch(getLexer().getKind()) { + default: + return true; + case AsmToken::Integer: + case AsmToken::Minus: + case AsmToken::Plus: + return (getParser().ParseExpression(Res)); + case AsmToken::Percent: + return parseRelocOperand(Res); + case AsmToken::LParen: + return false; //it's probably assuming 0 + } + return true; +} + +MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( + SmallVectorImpl<MCParsedAsmOperand*>&Operands) { + + const MCExpr *IdVal = 0; + SMLoc S; + //first operand is the offset + S = Parser.getTok().getLoc(); + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + const AsmToken &Tok = Parser.getTok(); //get next token + if (Tok.isNot(AsmToken::LParen)) { + Error(Parser.getTok().getLoc(), "'(' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat '(' token. + + const AsmToken &Tok1 = Parser.getTok(); //get next token + if (Tok1.is(AsmToken::Dollar)) { + Parser.Lex(); // Eat '$' token. + if (tryParseRegisterOperand(Operands,"")) { + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return MatchOperand_ParseFail; + } + + } else { + Error(Parser.getTok().getLoc(),"unexpected token in operand"); + return MatchOperand_ParseFail; + } + + const AsmToken &Tok2 = Parser.getTok(); //get next token + if (Tok2.isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "')' expected"); + return MatchOperand_ParseFail; + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Parser.Lex(); // Eat ')' token. + + if (IdVal == 0) + IdVal = MCConstantExpr::Create(0, getContext()); + + //now replace register operand with the mem operand + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + int RegNo = op->getReg(); + //remove register from operands + Operands.pop_back(); + //and add memory operand + Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E)); + delete op; + return MatchOperand_Success; +} + +MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { + + MCSymbolRefExpr::VariantKind VK + = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol) + .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI) + .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO) + .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL) + .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL) + .Case("got", MCSymbolRefExpr::VK_Mips_GOT) + .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD) + .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM) + .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI) + .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO) + .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL) + .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI) + .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO) + .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP) + .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE) + .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST) + .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI) + .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO) + .Default(MCSymbolRefExpr::VK_None); + + return VK; +} + +int ConvertCcString(StringRef CondString){ + + int CC = StringSwitch<unsigned>(CondString) + .Case(".f", 0) + .Case(".un", 1) + .Case(".eq", 2) + .Case(".ueq", 3) + .Case(".olt", 4) + .Case(".ult", 5) + .Case(".ole", 6) + .Case(".ule", 7) + .Case(".sf", 8) + .Case(".ngle", 9) + .Case(".seq", 10) + .Case(".ngl", 11) + .Case(".lt", 12) + .Case(".nge", 13) + .Case(".le", 14) + .Case(".ngt", 15) + .Default(-1); + + return CC; +} + +bool MipsAsmParser:: +parseMathOperation(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + //split the format + size_t Start = Name.find('.'), Next = Name.rfind('.'); + StringRef Format1 = Name.slice(Start, Next); + //and add the first format to the operands + Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc)); + //now for the second format + StringRef Format2 = Name.slice(Next, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc)); + + //set the format for the first register + setFpFormat(Format1); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + if (getLexer().isNot(AsmToken::Comma)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + + } + Parser.Lex(); // Eat the comma. + + //set the format for the first register + setFpFormat(Format2); + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; +} + bool MipsAsmParser:: ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - return true; + //floating point instructions: should register be treated as double? + if (requestsDoubleOperand(Name)) { + setFpFormat(FP_FORMAT_D); + Operands.push_back(MipsOperand::CreateToken(Name, NameLoc)); + } + else { + setDefaultFpFormat(); + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Mnemonic = Name.slice(Start, Next); + + Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc)); + + if (Next != StringRef::npos) { + //there is a format token in mnemonic + //StringRef Rest = Name.slice(Next, StringRef::npos); + size_t Dot = Name.find('.', Next+1); + StringRef Format = Name.slice(Next, Dot); + if (Dot == StringRef::npos) //only one '.' in a string, it's a format + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + else { + if (Name.startswith("c.")){ + // floating point compare, add '.' and immediate represent for cc + Operands.push_back(MipsOperand::CreateToken(".", NameLoc)); + int Cc = ConvertCcString(Format); + if (Cc == -1) { + return Error(NameLoc, "Invalid conditional code"); + } + SMLoc E = SMLoc::getFromPointer( + Parser.getTok().getLoc().getPointer() -1 ); + Operands.push_back(MipsOperand::CreateImm( + MCConstantExpr::Create(Cc, getContext()), NameLoc, E)); + } else { + //trunc, ceil, floor ... + return parseMathOperation(Name, NameLoc, Operands); + } + + //the rest is a format + Format = Name.slice(Dot, StringRef::npos); + Operands.push_back(MipsOperand::CreateToken(Format, NameLoc)); + } + + setFpFormat(Format); + } + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma) ) { + Parser.Lex(); // Eat the comma. + + // Parse and remember the operand. + if (ParseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement + return false; } bool MipsAsmParser:: ParseDirective(AsmToken DirectiveID) { - return true; -} -MipsAsmParser::OperandMatchResultTy MipsAsmParser:: - parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) { - return MatchOperand_ParseFail; + if (DirectiveID.getString() == ".ent") { + //ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".end") { + //ignore this directive for now + Parser.Lex(); + return false; + } + + if (DirectiveID.getString() == ".frame") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".set") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".fmask") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".mask") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + if (DirectiveID.getString() == ".gpword") { + //ignore this directive for now + Parser.EatToEndOfStatement(); + return false; + } + + return true; } extern "C" void LLVMInitializeMipsAsmParser() { @@ -130,3 +946,7 @@ extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target); RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget); } + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "MipsGenAsmMatcher.inc" diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index f535c504db..0f84358e26 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp + MipsDirectObjLower.cpp MipsELFWriterInfo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 234455e0c7..96033276d2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -122,7 +122,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: + case Mips::D0: case Mips::FCC0: return 0; case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: return 1; diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index b8489cac55..5d240fe847 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -56,7 +56,7 @@ namespace { MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64, bool IsLittleEndian) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false, + /*HasRelocationAddend*/ (_isN64) ? true : false, /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 8e198d3dac..1d7370a04f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -143,7 +143,11 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions"); + + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, @@ -159,13 +163,10 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - - /* LOCALMOD-START */ // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return (unsigned)MO.getImm() / 4; - /* LOCALMOD-END */ - - assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions"); + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); const MCExpr *Expr = MO.getExpr(); Fixups.push_back(MCFixup::Create(0, Expr, diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 2bc286b6bb..ec84ad81f5 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), - RI(*tm.getSubtargetImpl(), *this) {} + RI(*tm.getSubtargetImpl()) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { return RI; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index c15d1bf52e..106e82fd38 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -38,9 +38,8 @@ using namespace llvm; -Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST) + : MipsRegisterInfo(ST) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 3f4b3a762a..c702a15f60 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -17,11 +17,11 @@ #include "MipsRegisterInfo.h" namespace llvm { +class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + Mips16RegisterInfo(const MipsSubtarget &Subtarget); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 20fc178077..147be5db15 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -110,9 +110,9 @@ def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; let Pattern = []<dag> in { -def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; -def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; -def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; + def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; + def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; + def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; } } // Rotate Instructions @@ -217,7 +217,15 @@ let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; +let Pattern = []<dag> in { + def DEXTU : ExtBase<2, "dextu", CPU64Regs>; + def DEXTM : ExtBase<1, "dextm", CPU64Regs>; +} def DINS : InsBase<7, "dins", CPU64Regs>; +let Pattern = []<dag> in { + def DINSU : InsBase<6, "dinsu", CPU64Regs>; + def DINSM : InsBase<5, "dinsm", CPU64Regs>; +} let isCodeGenOnly = 1, rs = 0, shamt = 0 in { def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp index dc8fbd0d03..99b163ec33 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -91,7 +91,7 @@ void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) { // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit. int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd); - int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16); + int64_t ShiftedImm = (uint64_t)Imm << (Seq[1].ImmOpnd - 16); if (!isInt<16>(ShiftedImm)) return; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index bf4dbcb07f..01dd8b5bb5 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "mips-asm-printer" #include "Mips.h" #include "MipsAsmPrinter.h" +#include "MipsDirectObjLower.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "InstPrinter/MipsInstPrinter.h" @@ -58,33 +59,31 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - // Direct object specific instruction lowering - if (!OutStreamer.hasRawTextSupport()) - switch (MI->getOpcode()) { - case Mips::DSLL: - case Mips::DSRL: - case Mips::DSRA: - assert(MI->getNumOperands() == 3 && - "Invalid no. of machine operands for shift!"); - assert(MI->getOperand(2).isImm()); - int64_t Shift = MI->getOperand(2).getImm(); - if (Shift > 31) { - MCInst TmpInst0; - MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32); - OutStreamer.EmitInstruction(TmpInst0); - return; - } - break; - } - MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { MCInst TmpInst0; MCInstLowering.Lower(I++, TmpInst0); + + // Direct object specific instruction lowering + if (!OutStreamer.hasRawTextSupport()){ + switch (TmpInst0.getOpcode()) { + // If shift amount is >= 32 it the inst needs to be lowered further + case Mips::DSLL: + case Mips::DSRL: + case Mips::DSRA: + Mips::LowerLargeShift(TmpInst0); + break; + // Double extract instruction is chosen by pos and size operands + case Mips::DEXT: + case Mips::DINS: + Mips::LowerDextDins(TmpInst0); + } + } + OutStreamer.EmitInstruction(TmpInst0); - } while ((I != E) && I->isInsideBundle()); + } while ((I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index cb7022b9e2..543329562a 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -139,7 +138,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { do { DEBUG(errs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 2bba8a3802..e3c8ed75cf 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -30,10 +30,11 @@ STATISTIC(FilledSlots, "Number of delay slots filled"); STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" " are not NOP."); -static cl::opt<bool> EnableDelaySlotFiller( - "enable-mips-delay-filler", +static cl::opt<bool> DisableDelaySlotFiller( + "disable-mips-delay-filler", cl::init(false), - cl::desc("Fill the Mips delay slots useful instructions."), + cl::desc("Disable the delay slot filler, which attempts to fill the Mips" + "delay slots with useful instructions."), cl::Hidden); // This option can be used to silence complaints by machine verifier passes. @@ -114,7 +115,9 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { InstrIter D; - if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { + // Delay slot filling is disabled at -O0. + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && + findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); ++UsefulSlots; } else diff --git a/lib/Target/Mips/MipsDirectObjLower.cpp b/lib/Target/Mips/MipsDirectObjLower.cpp new file mode 100644 index 0000000000..0d74db808c --- /dev/null +++ b/lib/Target/Mips/MipsDirectObjLower.cpp @@ -0,0 +1,86 @@ +//===-- MipsDirectObjLower.cpp - Mips LLVM direct object lowering -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Mips MCInst records that are normally +// left to the assembler to lower such as large shifts. +// +//===----------------------------------------------------------------------===// +#include "MipsDirectObjLower.h" +#include "MipsInstrInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// If the D<shift> instruction has a shift amount that is greater +// than 31 (checked in calling routine), lower it to a D<shift>32 instruction +void Mips::LowerLargeShift(MCInst& Inst) { + + assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!"); + assert(Inst.getOperand(2).isImm()); + + bool isLarge = false; + int64_t Shift; + Shift = Inst.getOperand(2).getImm(); + if (Shift > 31) { + Shift -= 32; + isLarge = true; + } + + // saminus32 + (Inst.getOperand(2)).setImm(Shift); + + if (isLarge) + switch (Inst.getOpcode()) { + default: + // Calling function is not synchronized + llvm_unreachable("Unexpected shift instruction"); + case Mips::DSLL: + Inst.setOpcode(Mips::DSLL32); + return; + case Mips::DSRL: + Inst.setOpcode(Mips::DSRL32); + return; + case Mips::DSRA: + Inst.setOpcode(Mips::DSRA32); + return; + } +} + +// Pick a DEXT or DINS instruction variant based on the pos and size operands +void Mips::LowerDextDins(MCInst& InstIn) { + int Opcode = InstIn.getOpcode(); + + if (Opcode == Mips::DEXT) + assert(InstIn.getNumOperands() == 4 && + "Invalid no. of machine operands for DEXT!"); + else // Only DEXT and DINS are possible + assert(InstIn.getNumOperands() == 5 && + "Invalid no. of machine operands for DINS!"); + + assert(InstIn.getOperand(2).isImm()); + int64_t pos = InstIn.getOperand(2).getImm(); + assert(InstIn.getOperand(3).isImm()); + int64_t size = InstIn.getOperand(3).getImm(); + + if (size <= 32) { + if ((pos < 32)) { // DEXT/DINS, do nothing + return; + } else { // DEXTU/DINSU + InstIn.getOperand(2).setImm(pos - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTU : Mips::DINSU); + return; + } + } else { // DEXTM/DINSM + assert(pos < 32 && "DEXT/DINS cannot have both size and pos > 32"); + InstIn.getOperand(3).setImm(size - 32); + InstIn.setOpcode((Opcode == Mips::DEXT) ? Mips::DEXTM : Mips::DINSM); + return; + } +} diff --git a/lib/Target/Mips/MipsDirectObjLower.h b/lib/Target/Mips/MipsDirectObjLower.h new file mode 100644 index 0000000000..8813cc9ac7 --- /dev/null +++ b/lib/Target/Mips/MipsDirectObjLower.h @@ -0,0 +1,28 @@ +//===-- MipsDirectObjLower.h - Mips LLVM direct object lowering *- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSDIRECTOBJLOWER_H +#define MIPSDIRECTOBJLOWER_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCInst; + class MCStreamer; + + namespace Mips { + /// MipsDirectObjLower - This name space is used to lower MCInstr in cases + // where the assembler usually finishes the lowering + // such as large shifts. + void LowerLargeShift(MCInst &Inst); + void LowerDextDins(MCInst &Inst); + } +} + +#endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 2fdea15e65..edcf8e531b 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -337,8 +337,9 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 7e9ae7dfba..146dbea15c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1583,15 +1583,15 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); - MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); + const MipsTargetObjectFile &TLOF = (const MipsTargetObjectFile&)getObjFileLowering(); // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); - SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); - return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); + SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32); + return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode); } // %hi/%lo relocation SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 50e3eb534e..8ade891ab5 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -262,46 +262,3 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } } } - -unsigned -llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst) { - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = IsN64 ? 64 : 32; - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - if (LastInst && (Seq.size() == 1)) { - *LastInst = *Inst; - return 0; - } - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. Skip the last instruction if - // LastInst is not 0. - for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - if (LastInst) - *LastInst = *Inst; - - return Seq.size() - !!LastInst; -} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7d56259062..aca2bc7ae9 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -88,18 +88,6 @@ private: const SmallVectorImpl<MachineOperand>& Cond) const; }; -namespace Mips { - /// Emit a series of instructions to load an immediate. All instructions - /// except for the last one are emitted. The function returns the number of - /// MachineInstrs generated. The opcode-immediate pair of the last - /// instruction is returned in LastInst, if it is not 0. - unsigned - loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, - MachineBasicBlock& MBB, MachineBasicBlock::iterator II, - DebugLoc DL, bool LastInstrIsADDiu, - MipsAnalyzeImmediate::Inst *LastInst); -} - /// Create MipsInstrInfo objects. const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8703eed1cc..f63b143bdd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -74,9 +74,10 @@ def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; // MAdd*/MSub* nodes def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, @@ -110,7 +111,7 @@ def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>; def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, [SDNPHasChain, SDNPInGlue]>; -def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; +def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>; def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; @@ -1112,6 +1113,26 @@ def EXT : ExtBase<0, "ext", CPURegs>; def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// +// Instruction aliases +//===----------------------------------------------------------------------===// +def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; +def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; +def : InstAlias<"addu $rs,$rt,$imm", + (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"add $rs,$rt,$imm", + (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"and $rs,$rt,$imm", + (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"j $rs", (JR CPURegs:$rs)>; +def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; +def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; +def : InstAlias<"slt $rs,$rt,$imm", + (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"xor $rs,$rt,$imm", + (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; + +//===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index f78203f705..b9dbd522b7 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -10,6 +10,10 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // +// FIXME: +// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. +// 2. If program has inline assembly statements whose size cannot be +// determined accurately, load branch target addresses from the GOT. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-long-branch" @@ -48,7 +52,7 @@ namespace { typedef MachineBasicBlock::reverse_iterator ReverseIter; struct MBBInfo { - uint64_t Size; + uint64_t Size, Address; bool HasLongBranch; MachineInstr *Br; @@ -61,7 +65,10 @@ namespace { static char ID; MipsLongBranch(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), - TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {} + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), + ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {} virtual const char *getPassName() const { return "Mips Long Branch"; @@ -81,6 +88,9 @@ namespace { const MipsInstrInfo *TII; MachineFunction *MF; SmallVector<MBBInfo, 16> MBBInfos; + bool IsPIC; + unsigned ABI; + unsigned LongBranchSeqSize; }; char MipsLongBranch::ID = 0; @@ -230,12 +240,6 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, // Expand branch instructions to long branches. void MipsLongBranch::expandToLongBranch(MBBInfo &I) { - I.HasLongBranch = true; - - bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; - unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI(); - bool N64 = ABI == MipsSubtarget::N64; - MachineBasicBlock::iterator Pos; MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br); DebugLoc DL = I.Br->getDebugLoc(); @@ -248,101 +252,105 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { MBB->addSuccessor(LongBrMBB); if (IsPIC) { - // $longbr: - // addiu $sp, $sp, -regsize * 2 - // sw $ra, 0($sp) - // bal $baltgt - // sw $a3, regsize($sp) - // $baltgt: - // lui $a3, %hi($baltgt) - // lui $at, %hi($tgt) - // addiu $a3, $a3, %lo($baltgt) - // addiu $at, $at, %lo($tgt) - // subu $at, $at, $a3 - // addu $at, $ra, $at - // - // if n64: - // lui $a3, %highest($baltgt) - // lui $ra, %highest($tgt) - // addiu $a3, $a3, %higher($baltgt) - // addiu $ra, $ra, %higher($tgt) - // dsll $a3, $a3, 32 - // dsll $ra, $ra, 32 - // subu $at, $at, $a3 - // addu $at, $at, $ra - // - // lw $ra, 0($sp) - // lw $a3, regsize($sp) - // jr $at - // addiu $sp, $sp, regsize * 2 - // $fallthrough: - // - MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); - int RegSize = N64 ? 8 : 4; - unsigned AT = N64 ? Mips::AT_64 : Mips::AT; - unsigned A3 = N64 ? Mips::A3_64 : Mips::A3; - unsigned SP = N64 ? Mips::SP_64 : Mips::SP; - unsigned RA = N64 ? Mips::RA_64 : Mips::RA; - unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; - unsigned Store = N64 ? Mips::SD_P8 : Mips::SW; - unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; - unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; - unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu; - unsigned JR = N64 ? Mips::JR64 : Mips::JR; - - Pos = LongBrMBB->begin(); - - BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(-RegSize * 2); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP) - .addImm(0); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP) - .addImm(RegSize)->setIsInsideBundle(); - - Pos = BalTgtMBB->begin(); - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT) - .addMBB(TgtMBB, MipsII::MO_ABS_HI); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT) - .addMBB(TgtMBB, MipsII::MO_ABS_LO); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT); - - if (N64) { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA) - .addMBB(TgtMBB, MipsII::MO_HIGHEST); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) - .addMBB(BalTgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA) - .addMBB(TgtMBB, MipsII::MO_HIGHER); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA) - .addImm(32); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA); - I.Size += 4 * 8; + int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address; + int64_t Offset = TgtAddress - (I.Address + I.Size - 20); + int64_t Lo = SignExtend64<16>(Offset & 0xffff); + int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff); + + if (ABI != MipsSubtarget::N64) { + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // bal $baltgt + // lui $at, %hi($tgt - $baltgt) + // $baltgt: + // addiu $at, $at, %lo($tgt - $baltgt) + // addu $at, $ra, $at + // lw $ra, 0($sp) + // jr $at + // addiu $sp, $sp, 8 + // $fallthrough: + // + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-8); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi) + ->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) + .addReg(Mips::AT).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT) + .addReg(Mips::RA).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) + .addReg(Mips::SP).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8)->setIsInsideBundle(); + } else { + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // lui64 $at, %highest($tgt - $baltgt) + // daddiu $at, $at, %higher($tgt - $baltgt) + // dsll $at, $at, 16 + // daddiu $at, $at, %hi($tgt - $baltgt) + // bal $baltgt + // dsll $at, $at, 16 + // $baltgt: + // daddiu $at, $at, %lo($tgt - $baltgt) + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // jr64 $at + // daddiu $sp, $sp, 16 + // $fallthrough: + // + + int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff); + int64_t Highest = + SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff); + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(-16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64) + .addImm(Highest); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Higher); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Hi); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) + .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) + .addReg(Mips::AT_64).addImm(Lo); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64) + .addReg(Mips::RA_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) + .addReg(Mips::SP_64).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle(); } - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) - .addImm(RegSize * 2)->setIsInsideBundle(); - I.Size += 4 * 14; } else { // $longbr: // j $tgt @@ -353,7 +361,6 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); - I.Size += 4 * 2; } if (I.Br->isUnconditionalBranch()) { @@ -401,19 +408,36 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { if (!I->Br || I->HasLongBranch) continue; - if (!ForceLongBranch) - // Check if offset fits into 16-bit immediate field of branches. - if (isInt<16>(computeOffset(I->Br) / 4)) - continue; + // Check if offset fits into 16-bit immediate field of branches. + if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4)) + continue; - expandToLongBranch(*I); + I->HasLongBranch = true; + I->Size += LongBranchSeqSize * 4; ++LongBranches; EverMadeChange = MadeChange = true; } } - if (EverMadeChange) - MF->RenumberBlocks(); + if (!EverMadeChange) + return true; + + // Compute basic block addresses. + if (TM.getRelocationModel() == Reloc::PIC_) { + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + + uint64_t Address = 0; + + for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I) + I->Address = Address; + } + + // Do the expansion. + for (I = MBBInfos.begin(); I != E; ++I) + if (I->HasLongBranch) + expandToLongBranch(*I); + + MF->RenumberBlocks(); return true; } diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 86778a2167..1d108ab192 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -11,7 +11,6 @@ // MCInst records. // //===----------------------------------------------------------------------===// - #include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" @@ -161,34 +160,6 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -// If the D<shift> instruction has a shift amount that is greater -// than 31 (checked in calling routine), lower it to a D<shift>32 instruction -void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI, - MCInst& Inst, - int64_t Shift) { - // rt - Inst.addOperand(LowerOperand(MI->getOperand(0))); - // rd - Inst.addOperand(LowerOperand(MI->getOperand(1))); - // saminus32 - Inst.addOperand(MCOperand::CreateImm(Shift)); - - switch (MI->getOpcode()) { - default: - // Calling function is not synchronized - llvm_unreachable("Unexpected shift instruction"); - break; - case Mips::DSLL: - Inst.setOpcode(Mips::DSLL32); - break; - case Mips::DSRL: - Inst.setOpcode(Mips::DSRL32); - break; - case Mips::DSRA: - Inst.setOpcode(Mips::DSRA32); - break; - } -} MCInst Instr4, Mask1, Mask2; // @LOCALMOD // @LOCALMOD-START diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 0abb996a68..3eab5a452e 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -33,7 +33,6 @@ public: MipsMCInstLower(MipsAsmPrinter &asmprinter); void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 150a2bd7f8..dafe14519a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/Constants.h" #include "llvm/DebugInfo.h" #include "llvm/Type.h" -#include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -43,9 +42,8 @@ using namespace llvm; -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &tii) - : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) + : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {} unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } @@ -132,6 +130,12 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::RA_64); } + // Reserve GP if small section is used. + if (Subtarget.useSmallSection()) { + Reserved.set(Mips::GP); + Reserved.set(Mips::GP_64); + } + return Reserved; } @@ -161,7 +165,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, "Instr doesn't have FrameIndex operand!"); } - DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 9a05e94be9..78adf7f18b 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -22,16 +22,14 @@ namespace llvm { class MipsSubtarget; -class TargetInstrInfo; class Type; class MipsRegisterInfo : public MipsGenRegisterInfo { protected: const MipsSubtarget &Subtarget; - const TargetInstrInfo &TII; public: - MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); + MipsRegisterInfo(const MipsSubtarget &Subtarget); /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index eeb1de36ef..e4b44efd81 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -260,14 +260,53 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); - BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg); } } +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned +MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const { + MipsAnalyzeImmediate AnalyzeImm; + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + unsigned Size = STI.isABI_N64() ? 64 : 32; + unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + bool LastInstrIsADDiu = NewImm; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1))); + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. + for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) + BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInstrIsADDiu) + *NewImm = Inst->ImmOpnd; + + return ATReg; +} + unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 346e74dba4..55b78b2cfb 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -15,7 +15,6 @@ #define MIPSSEINSTRUCTIONINFO_H #include "MipsInstrInfo.h" -#include "MipsAnalyzeImmediate.h" #include "MipsSERegisterInfo.h" namespace llvm { @@ -70,6 +69,13 @@ public: void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + /// Emit a series of instructions to load an immediate. If NewImm is a + /// non-NULL parameter, the last instruction is not emitted, but instead + /// its immediate operand is returned in NewImm. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 043a1ef683..d868f73758 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -40,8 +40,8 @@ using namespace llvm; MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, - const TargetInstrInfo &TII) - : MipsRegisterInfo(ST, TII) {} + const MipsSEInstrInfo &I) + : MipsRegisterInfo(ST), TII(I) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions @@ -122,15 +122,14 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate::Inst LastInst(0, 0); + unsigned NewImm; MipsFI->setEmitNOAT(); - Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, - &LastInst); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm); + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(Reg); FrameReg = ATReg; - Offset = SignExtend64<16>(LastInst.ImmOpnd); + Offset = SignExtend64<16>(NewImm); } MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 4b17b33e9a..b4eab65522 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -18,11 +18,14 @@ #include "MipsRegisterInfo.h" namespace llvm { +class MipsSEInstrInfo; class MipsSERegisterInfo : public MipsRegisterInfo { + const MipsSEInstrInfo &TII; + public: MipsSERegisterInfo(const MipsSubtarget &Subtarget, - const TargetInstrInfo &TII); + const MipsSEInstrInfo &TII); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index c5d6bf9811..7f5927d8ed 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -25,7 +25,8 @@ using namespace llvm; void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little) : + const std::string &FS, bool little, + Reloc::Model RM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), @@ -57,6 +58,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; + + // Set UseSmallSection. + UseSmallSection = !IsLinux && (RM == Reloc::Static); } bool diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ff89ff93a1..cc03587f61 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -65,6 +65,9 @@ protected: // isLinux - Target system is Linux. Is false we consider ELFOS for now. bool IsLinux; + // UseSmallSection - Small section is used. + bool UseSmallSection; + /// Features related to the presence of specific instructions. // HasSEInReg - SEB and SEH (signext in register) instructions. @@ -111,7 +114,7 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool little); + const std::string &FS, bool little, Reloc::Model RM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -135,6 +138,7 @@ public: bool inMips16Mode() const { return InMips16Mode; } bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } + bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index bcc5ba97e4..515d26534a 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle), + Subtarget(TT, CPU, FS, isLittle, RM), DataLayout(isLittle ? (Subtarget.isABI_N64() ? "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index e91b2d811f..578dbd03f8 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -26,6 +26,7 @@ SSThreshold("mips-ssection-threshold", cl::Hidden, void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); SmallDataSection = getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, @@ -77,9 +78,10 @@ bool MipsTargetObjectFile:: IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind) const { - // Only use small section for non linux targets. const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); - if (Subtarget.isLinux()) + + // Return if small section is not available. + if (!Subtarget.useSmallSection()) return false; // @LOCALMOD-BEGIN diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index d175e3e79e..413142eb2b 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -137,7 +137,7 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { char Value = MI->getOperand(OpNo).getImm(); - Value = (Value << (32-5)) >> (32-5); + Value = SignExtend32<5>(Value); O << (int)Value; } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index b7f1688436..cb15dadb7e 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E500mc", "">; +def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", + "PPC::DIR_E5500", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; @@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"e500mc", PPCE500mcModel, + [DirectiveE500mc, FeatureMFOCRF, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; +def : ProcessorModel<"e5500", PPCE5500Model, + [DirectiveE5500, FeatureMFOCRF, Feature64Bit, + FeatureSTFIWX, FeatureBookE, FeatureISEL]>; def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, FeatureISEL, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index f76b89c803..6e0e8bb8bc 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -109,6 +109,8 @@ namespace { bool doFinalization(Module &M); virtual void EmitFunctionEntryLabel(); + + void EmitFunctionBodyEnd(); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -345,23 +347,32 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitLabel(PICBase); return; } + case PPC::LDtocJTI: + case PPC::LDtocCPT: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - + // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); - assert(MO.isGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + + // Map symbol -> label of TOC entry + assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); + MCSymbol *MOSymbol = 0; + if (MO.isGlobal()) + MOSymbol = Mang->getSymbol(MO.getGlobal()); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + MCSymbol *&TOCEntry = TOC[MOSymbol]; if (TOCEntry == 0) TOCEntry = GetTempSymbol("C", TOCLabelID++); - + const MCExpr *Exp = - MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); @@ -406,9 +417,9 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), - Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + 8/*size*/, 0/*addrspace*/); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -441,6 +452,23 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyEnd - Print the traceback table before the .size +/// directive. +/// +void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() { + // Only the 64-bit target requires a traceback table. For now, + // we only emit the word of zeroes that GDB requires to find + // the end of the function, and zeroes for the eight-byte + // mandatory fields. + // FIXME: We should fill in the eight-byte mandatory fields as described in + // the PPC64 ELF ABI (this is a low-priority item because GDB does not + // currently make use of these fields). + if (Subtarget.isPPC64()) { + OutStreamer.EmitIntValue(0, 4/*size*/); + OutStreamer.EmitIntValue(0, 8/*size*/); + } +} + void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", @@ -453,6 +481,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "ppce500mc", + "ppce5500", "power6", "power7", "ppc64" diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index a00f686adc..e8f4d16997 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -975,6 +975,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; + uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm @@ -993,6 +994,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } + // If this is a 64-bit zero-extension mask, emit rldicl. + if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && + isMask_64(Imm64)) { + SDValue Val = N->getOperand(0); + MB = 64 - CountTrailingOnes_64(Imm64); + SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61d44c52d4..dbb3b144a7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(); + + // The Freescale cores does better with aggressive inlining of memcpy and + // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). + if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || + Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + maxStoresPerMemset = 32; + maxStoresPerMemsetOptSize = 16; + maxStoresPerMemcpy = 32; + maxStoresPerMemcpyOptSize = 8; + maxStoresPerMemmove = 32; + maxStoresPerMemmoveOptSize = 8; + + setPrefFunctionAlignment(4); + benefitFromCodePlacementOpt = true; + } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -517,6 +532,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; + case PPCISD::CR6SET: return "PPCISD::CR6SET"; + case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; } } @@ -811,14 +828,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { } // Properly sign extend the value. - int ShAmt = (4-ByteSize)*8; - int MaskVal = ((int)Value << ShAmt) >> ShAmt; + int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it - if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) + if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, MVT::i32); return SDValue(); } @@ -1204,6 +1220,14 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue CPIHi = @@ -1217,6 +1241,14 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1441,7 +1473,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, false, 0); } @@ -2408,7 +2440,7 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. - (Addr << 6 >> 6) != Addr) + SignExtend32<26>(Addr) != Addr) return 0; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, @@ -2819,6 +2851,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, isTailCall, RegsToPass, Ops, NodeTys, PPCSubTarget); + // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls + if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); + // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. @@ -3116,14 +3152,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Set CR6 to true if this is a vararg call with floating args passed in - // registers. - if (isVarArg) { - SDValue SetCR(DAG.getMachineNode(seenFloatArg ? PPC::CRSET : PPC::CRUNSET, - dl, MVT::i32), 0); - RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR)); - } - // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -3133,6 +3161,18 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + // Set CR bit 6 to true if this is a vararg call with floating args passed in + // registers. + if (isVarArg) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, InFlag }; + + Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, + dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + + InFlag = Chain.getValue(1); + } + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); @@ -4126,7 +4166,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { + if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, @@ -4171,17 +4211,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { + if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { + if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b0a013b4b4..902b188da7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -174,6 +174,10 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, + /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls + CR6SET, + CR6UNSET, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 39778a5dc1..cfe71d177e 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -29,6 +29,9 @@ def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; let EncoderMethod = "getLO16Encoding"; } +def tocentry : Operand<iPTR> { + let MIOperandInfo = (ops i32imm:$imm); +} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -296,12 +299,14 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; +} // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), @@ -459,7 +464,7 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), - "sradi $rA, $rS, $SH", IntRotateD, + "sradi $rA, $rS, $SH", IntRotateDI, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), @@ -482,7 +487,7 @@ def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), let isCommutable = 1 in { def RLDIMI : MDForm_1<30, 3, (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldimi $rA, $rS, $SH, $MB", IntRotateD, + "rldimi $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -494,11 +499,11 @@ def RLDCL : MDForm_1<30, 0, []>, isPPC64; def RLDICL : MDForm_1<30, 0, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateD, + "rldicl $rA, $rS, $SH, $MB", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateD, + "rldicr $rA, $rS, $SH, $ME", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, @@ -541,19 +546,19 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLoad, + "lhau $rD, $disp($rA)", LdStLHAU, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLoad, + "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -584,31 +589,31 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -624,6 +629,14 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; +def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; +def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, + (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. @@ -642,13 +655,13 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLD, + "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLoad, + "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -695,14 +708,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -710,7 +723,7 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, @@ -718,7 +731,7 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTD, + "stdu $rS, $ptroff($ptrreg)", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, @@ -727,7 +740,7 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -736,7 +749,7 @@ def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -745,7 +758,7 @@ def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -754,7 +767,7 @@ def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStStore, + "stdux $rS, $ptroff, $ptrreg", LdStSTDU, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 47f09dca77..d2df6645bb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || + Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCScoreboardHazardRecognizer(II, DAG); } @@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && + Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f57f0c975a..a503908d8d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -123,9 +123,11 @@ def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + [SDNPHasChain, SDNPSideEffect, + SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -153,6 +155,12 @@ def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to set/unset CR bit 6 for SVR4 vararg calls +def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Instructions to support atomic operations def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, [SDNPHasChain, SDNPMayLoad]>; @@ -330,9 +338,6 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } -def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -673,7 +678,7 @@ def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFDU, + "lfs $rD, $src", LdStLFD, [(set F4RC:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, @@ -683,32 +688,32 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoad, + "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLoad, + "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoad, + "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoad, + "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfs $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lfd $rD, $addr", LdStLFD, + "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -716,37 +721,37 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoad, + "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLoad, + "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoad, + "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoad, + "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLoad, + "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLoad, + "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; } @@ -778,10 +783,10 @@ def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFDU, + "lfsx $frD, $src", LdStLFD, [(set F4RC:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFDU, + "lfdx $frD, $src", LdStLFD, [(set F8RC:$frD, (load xaddr:$src))]>; } @@ -801,10 +806,10 @@ def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), - "stfs $rS, $dst", LdStUX, + "stfs $rS, $dst", LdStSTFD, [(store F4RC:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), - "stfd $rS, $dst", LdStUX, + "stfd $rS, $dst", LdStSTFD, [(store F8RC:$rS, iaddr:$dst)]>; } @@ -812,33 +817,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStore, + "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStore, + "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStore, + "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStStore, + "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStStore, + "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -863,7 +868,7 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStore, + "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -872,7 +877,7 @@ def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStore, + "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, @@ -881,7 +886,7 @@ def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStore, + "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -889,7 +894,7 @@ def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStStore, + "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -897,7 +902,7 @@ def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStStore, + "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, @@ -913,14 +918,14 @@ def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStUX, + "stfiwx $frS, $dst", LdStSTFD, [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStUX, + "stfsx $frS, $dst", LdStSTFD, [(store F4RC:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStUX, + "stfdx $frS, $dst", LdStSTFD, [(store F8RC:$frS, xaddr:$dst)]>; } @@ -964,7 +969,7 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; @@ -1143,6 +1148,16 @@ def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins), "crxor $dst, $dst, $dst", BrCR, []>; +let Defs = [CR1EQ], CRD = 6 in { +def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), + "creqv 6, 6, 6", BrCR, + [(PPCcr6set)]>; + +def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), + "crxor 6, 6, 6", BrCR, + [(PPCcr6unset)]>; +} + // XFX-Form instructions. Instructions that deal with SPRs. // let Uses = [CTR] in { @@ -1233,7 +1248,7 @@ let Uses = [RM] in { PPC970_DGroup_Single, PPC970_Unit_FPU; def FADDrtz: AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1364,7 +1379,7 @@ def FSELS : AForm_1<63, 23, let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPGeneral, + "fadd $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), @@ -1388,7 +1403,7 @@ let Uses = [RM] in { [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fsub $FRT, $FRA, $FRB", FPGeneral, + "fsub $FRT, $FRA, $FRB", FPAddSub, [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 6a6ccb9d98..660c0c3b63 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -40,6 +40,7 @@ def IntMulHWU : InstrItinClass; def IntMulLI : InstrItinClass; def IntRFID : InstrItinClass; def IntRotateD : InstrItinClass; +def IntRotateDI : InstrItinClass; def IntRotate : InstrItinClass; def IntShift : InstrItinClass; def IntTrapD : InstrItinClass; @@ -52,15 +53,18 @@ def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; def LdStLoad : InstrItinClass; +def LdStLoadUpd : InstrItinClass; def LdStStore : InstrItinClass; +def LdStStoreUpd : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; -def LdStUX : InstrItinClass; def LdStLD : InstrItinClass; +def LdStLDU : InstrItinClass; def LdStLDARX : InstrItinClass; def LdStLFD : InstrItinClass; def LdStLFDU : InstrItinClass; def LdStLHA : InstrItinClass; +def LdStLHAU : InstrItinClass; def LdStLMW : InstrItinClass; def LdStLVecX : InstrItinClass; def LdStLWA : InstrItinClass; @@ -69,6 +73,9 @@ def LdStSLBIA : InstrItinClass; def LdStSLBIE : InstrItinClass; def LdStSTD : InstrItinClass; def LdStSTDCX : InstrItinClass; +def LdStSTDU : InstrItinClass; +def LdStSTFD : InstrItinClass; +def LdStSTFDU : InstrItinClass; def LdStSTVEBX : InstrItinClass; def LdStSTWCX : InstrItinClass; def LdStSync : InstrItinClass; @@ -86,6 +93,7 @@ def SprMTSRIN : InstrItinClass; def SprRFI : InstrItinClass; def SprSC : InstrItinClass; def FPGeneral : InstrItinClass; +def FPAddSub : InstrItinClass; def FPCompare : InstrItinClass; def FPDivD : InstrItinClass; def FPDivS : InstrItinClass; @@ -110,6 +118,8 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleA2.td" +include "PPCScheduleE500mc.td" +include "PPCScheduleE5500.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -171,7 +181,7 @@ include "PPCScheduleA2.td" // extsh IntSimple // extsw IntSimple // fabs FPGeneral -// fadd FPGeneral +// fadd FPAddSub // fadds FPGeneral // fcfid FPGeneral // fcmpo FPCompare @@ -201,35 +211,35 @@ include "PPCScheduleA2.td" // fsel FPGeneral // fsqrt FPSqrt // fsqrts FPSqrt -// fsub FPGeneral +// fsub FPAddSub // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC // lbz LdStLoad -// lbzu LdStLoad -// lbzux LdStUX +// lbzu LdStLoadUpd +// lbzux LdStLoadUpd // lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX -// ldu LdStLD -// ldux LdStLD +// ldu LdStLDU +// ldux LdStLDU // ldx LdStLD // lfd LdStLFD // lfdu LdStLFDU // lfdux LdStLFDU -// lfdx LdStLFDU -// lfs LdStLFDU +// lfdx LdStLFD +// lfs LdStLFD // lfsu LdStLFDU // lfsux LdStLFDU -// lfsx LdStLFDU +// lfsx LdStLFD // lha LdStLHA -// lhau LdStLHA -// lhaux LdStLHA +// lhau LdStLHAU +// lhaux LdStLHAU // lhax LdStLHA // lhbrx LdStLoad // lhz LdStLoad -// lhzu LdStLoad -// lhzux LdStUX +// lhzu LdStLoadUpd +// lhzux LdStLoadUpd // lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW @@ -243,12 +253,12 @@ include "PPCScheduleA2.td" // lvxl LdStLVecX // lwa LdStLWA // lwarx LdStLWARX -// lwaux LdStLHA +// lwaux LdStLHAU // lwax LdStLHA // lwbrx LdStLoad // lwz LdStLoad -// lwzu LdStLoad -// lwzux LdStUX +// lwzu LdStLoadUpd +// lwzux LdStLoadUpd // lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral @@ -292,10 +302,10 @@ include "PPCScheduleA2.td" // rfid IntRFID // rldcl IntRotateD // rldcr IntRotateD -// rldic IntRotateD -// rldicl IntRotateD -// rldicr IntRotateD -// rldimi IntRotateD +// rldic IntRotateDI +// rldicl IntRotateDI +// rldicr IntRotateDI +// rldimi IntRotateDI // rlwimi IntRotate // rlwinm IntGeneral // rlwnm IntGeneral @@ -305,33 +315,33 @@ include "PPCScheduleA2.td" // sld IntRotateD // slw IntGeneral // srad IntRotateD -// sradi IntRotateD +// sradi IntRotateDI // sraw IntShift // srawi IntShift // srd IntRotateD // srw IntGeneral // stb LdStStore -// stbu LdStStore -// stbux LdStStore +// stbu LdStStoreUpd +// stbux LdStStoreUpd // stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX -// stdu LdStSTD -// stdux LdStSTD +// stdu LdStSTDU +// stdux LdStSTDU // stdx LdStSTD -// stfd LdStUX -// stfdu LdStUX -// stfdux LdStUX -// stfdx LdStUX -// stfiwx LdStUX -// stfs LdStUX -// stfsu LdStUX -// stfsux LdStUX -// stfsx LdStUX +// stfd LdStSTFD +// stfdu LdStSTFDU +// stfdux LdStSTFDU +// stfdx LdStSTFD +// stfiwx LdStSTFD +// stfs LdStSTFD +// stfsu LdStSTFDU +// stfsux LdStSTFDU +// stfsx LdStSTFD // sth LdStStore // sthbrx LdStStore -// sthu LdStStore -// sthux LdStStore +// sthu LdStStoreUpd +// sthux LdStStoreUpd // sthx LdStStore // stmw LdStLMW // stswi LdStLMW @@ -344,8 +354,8 @@ include "PPCScheduleA2.td" // stw LdStStore // stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStStore -// stwux LdStStore +// stwu LdStStoreUpd +// stwux LdStStoreUpd // stwx LdStStore // subf IntGeneral // subfc IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index cd0fb70a24..37b6eac10c 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -288,6 +288,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [9, 5], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -297,6 +306,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -306,7 +324,7 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStSTFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, @@ -315,6 +333,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5, 5], [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -342,6 +369,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -371,6 +407,15 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<2, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, @@ -537,6 +582,19 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [FWB]>], [10, 4, 4], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 4d4a5d0e1b..ba63b5cd8f 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -181,6 +181,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData<IntShift , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -302,7 +313,18 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, - InstrItinData<LdStLD , [InstrStage<4, + InstrItinData<LdStLoadUpd , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -324,6 +346,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -335,7 +368,7 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStUX , [InstrStage<4, + InstrItinData<LdStSTFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7]>, @@ -346,6 +379,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7, 7], [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<LdStLFD , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,6 +423,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStLMW , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -412,6 +467,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [13, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData<LdStSTDCX , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -593,6 +659,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], [15, 7, 7], [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, InstrItinData<FPCompare , [InstrStage<4, [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td new file mode 100644 index 0000000000..9bb779a0e6 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -0,0 +1,265 @@ +//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e500mc 32-bit +// Power processor. +// +// All information is derived from the "e500mc Core Reference Manual", +// Freescale Document Number E500MCRM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e500mc core: +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +def DIS0 : FuncUnit; // Dispatch stage - insn 1 +def DIS1 : FuncUnit; // Dispatch stage - insn 2 + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// Some instructions can only execute in SFX0 but not SFX1. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is executed. +def SFX0 : FuncUnit; // Simple unit 0 +def SFX1 : FuncUnit; // Simple unit 1 +def BU : FuncUnit; // Branch unit +def CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def CFX_0 : FuncUnit; // CFX pipeline +def LSU_0 : FuncUnit; // LSU pipeline +def FPU_0 : FuncUnit; // FPU pipeline + +def PPCE500mcItineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<14, [CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11], // Latency = 8 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<8, [FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [4, 1], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1, 1], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1, 1], // Latency = 4 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [5, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [SFX0]>], + [8, 1], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0]>], + [4, 1], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<68, [FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<38, [FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e500mc machine model for scheduling and other instruction cost heuristics. + +def PPCE500mcModel : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE500mcItineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td new file mode 100644 index 0000000000..d7e11acd9f --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleE5500.td @@ -0,0 +1,309 @@ +//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Freescale e5500 64-bit +// Power processor. +// +// All information is derived from the "e5500 Core Reference Manual", +// Freescale Document Number e5500RM, Rev. 1, 03/2012. +// +//===----------------------------------------------------------------------===// +// Relevant functional units in the Freescale e5500 core +// (These are the same as for the e500mc) +// +// * Decode & Dispatch +// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue +// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). +// def DIS0 : FuncUnit; +// def DIS1 : FuncUnit; + +// * Execute +// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. +// The CFX has a bypass path, allowing non-divide instructions to execute +// while a divide instruction is being executed. +// def SFX0 : FuncUnit; // Simple unit 0 +// def SFX1 : FuncUnit; // Simple unit 1 +// def BU : FuncUnit; // Branch unit +// def CFX_DivBypass +// : FuncUnit; // CFX divide bypass path +// def CFX_0 : FuncUnit; // CFX pipeline stage 0 + +def CFX_1 : FuncUnit; // CFX pipeline stage 1 + +// def LSU_0 : FuncUnit; // LSU pipeline +// def FPU_0 : FuncUnit; // FPU pipeline + + +def PPCE5500Itineraries : ProcessorItineraries< + [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, + LSU_0, FPU_0], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<26, [CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<16, [CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [FPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<7, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<1, [CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0], 0>, + InstrStage<2, [CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0, SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [BU]>], + [5, 2], // Latency = 1 + [CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5, 2, 2], // Latency = 1 + [CR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [FPR_Bypass, GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [GPR_Bypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<3, [LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0]>]>, + InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [LSU_0], 0>]>, + InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<5, [CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<4, [CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [SFX0, SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<31, [FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<16, [FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<1, [FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>, + InstrStage<2, [FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [FPR_Bypass, FPR_Bypass]> +]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def PPCE5500Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPCE5500Itineraries; +} diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 61e89ed32c..72a0a39263 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -34,12 +34,16 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<8, [SLU]>]>, @@ -58,6 +62,7 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index e19ddfa80e..fc9120dfa2 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -33,13 +33,17 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<2, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<34, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, @@ -60,6 +64,7 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<2, [SRU]>]>, InstrItinData<SprSC , [InstrStage<2, [SRU]>]>, InstrItinData<FPGeneral , [InstrStage<1, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<1, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<1, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<31, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<17, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index e7446cb028..a4e82ce23e 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -36,19 +36,24 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, - InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<37, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWARX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSync , [InstrStage<35, [SLU]>]>, @@ -66,6 +71,7 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<SprRFI , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<SprSC , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>, InstrItinData<FPGeneral , [InstrStage<5, [FPU1]>]>, + InstrItinData<FPAddSub , [InstrStage<5, [FPU1]>]>, InstrItinData<FPCompare , [InstrStage<5, [FPU1]>]>, InstrItinData<FPDivD , [InstrStage<35, [FPU1]>]>, InstrItinData<FPDivS , [InstrStage<21, [FPU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 1371499726..7c02ea099c 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -27,6 +27,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<IntMulLI , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntRFID , [InstrStage<1, [IU2]>]>, InstrItinData<IntRotateD , [InstrStage<2, [IU1, IU2]>]>, + InstrItinData<IntRotateDI , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntRotate , [InstrStage<4, [IU1, IU2]>]>, InstrItinData<IntShift , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntTrapD , [InstrStage<1, [IU1, IU2]>]>, @@ -37,15 +38,20 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoadUpd , [InstrStage<3, [SLU]>]>, InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStoreUpd, [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, - InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFD , [InstrStage<4, [SLU]>]>, + InstrItinData<LdStSTFDU , [InstrStage<4, [SLU]>]>, InstrItinData<LdStLD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLDARX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLFDU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLHA , [InstrStage<5, [SLU]>]>, + InstrItinData<LdStLHAU , [InstrStage<5, [SLU]>]>, InstrItinData<LdStLMW , [InstrStage<64, [SLU]>]>, InstrItinData<LdStLVecX , [InstrStage<3, [SLU]>]>, InstrItinData<LdStLWA , [InstrStage<5, [SLU]>]>, @@ -53,6 +59,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<LdStSLBIA , [InstrStage<40, [SLU]>]>, // needs work InstrItinData<LdStSLBIE , [InstrStage<2, [SLU]>]>, InstrItinData<LdStSTD , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStSTDU , [InstrStage<3, [SLU]>]>, InstrItinData<LdStSTDCX , [InstrStage<11, [SLU]>]>, InstrItinData<LdStSTVEBX , [InstrStage<5, [SLU]>]>, InstrItinData<LdStSTWCX , [InstrStage<11, [SLU]>]>, @@ -69,6 +76,7 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<SprMTSPR , [InstrStage<8, [IU2]>]>, InstrItinData<SprSC , [InstrStage<1, [IU2]>]>, InstrItinData<FPGeneral , [InstrStage<6, [FPU1, FPU2]>]>, + InstrItinData<FPAddSub , [InstrStage<6, [FPU1, FPU2]>]>, InstrItinData<FPCompare , [InstrStage<8, [FPU1, FPU2]>]>, InstrItinData<FPDivD , [InstrStage<33, [FPU1, FPU2]>]>, InstrItinData<FPDivS , [InstrStage<33, [FPU1, FPU2]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 0207c83393..b8b1614e62 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -41,6 +41,8 @@ namespace PPC { DIR_750, DIR_970, DIR_A2, + DIR_E500mc, + DIR_E5500, DIR_PWR6, DIR_PWR7, DIR_64 diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 15541ef2f8..e64c140e49 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -129,7 +129,7 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, - [SDNPHasChain]>; + [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 924b1ed80d..62f973e658 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,6 +24,16 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "_ZdaPv", + "_ZdlPv", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", "__cxa_atexit", "__cxa_guard_abort", "__cxa_guard_acquire", @@ -50,6 +60,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "atanhf", "atanhl", "atanl", + "calloc", "cbrt", "cbrtf", "cbrtl", @@ -89,6 +100,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "fmodl", "fputc", "fputs", + "free", "fwrite", "iprintf", "log", @@ -106,6 +118,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "logbl", "logf", "logl", + "malloc", "memchr", "memcmp", "memcpy", @@ -115,11 +128,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "nearbyint", "nearbyintf", "nearbyintl", + "posix_memalign", "pow", "powf", "powl", "putchar", "puts", + "realloc", + "reallocf", "rint", "rintf", "rintl", @@ -139,10 +155,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "strcat", "strchr", "strcpy", + "strdup", "strlen", "strncat", "strncmp", "strncpy", + "strndup", "strnlen", "tan", "tanf", @@ -152,7 +170,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "tanl", "trunc", "truncf", - "truncl" + "truncl", + "valloc" }; /// initialize - Initialize the set of available library functions based on the diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 73a00950ac..2d787b6a9a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -67,12 +67,19 @@ private: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); - bool MatchInstruction(SMLoc IDLoc, + bool MatchInstruction(SMLoc IDLoc, unsigned &Kind, SmallVectorImpl<MCParsedAsmOperand*> &Operands, SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, bool matchingInlineAsm = false); + unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned OperandNum, unsigned &NumMCOperands) { + return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, + NumMCOperands); + } + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -514,12 +521,13 @@ bool X86AsmParser::isDstOp(X86Operand &Op) { bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { RegNo = 0; - if (!isParsingIntelSyntax()) { - const AsmToken &TokPercent = Parser.getTok(); - assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); - StartLoc = TokPercent.getLoc(); + const AsmToken &PercentTok = Parser.getTok(); + StartLoc = PercentTok.getLoc(); + + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) Parser.Lex(); // Eat percent token. - } const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { @@ -1516,9 +1524,12 @@ bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { - SmallVector<MCInst, 2> Insts; + unsigned Kind; unsigned ErrorInfo; - bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo); + SmallVector<MCInst, 2> Insts; + + bool Error = MatchInstruction(IDLoc, Kind, Operands, Insts, + ErrorInfo); if (!Error) for (unsigned i = 0, e = Insts.size(); i != e; ++i) Out.EmitInstruction(Insts[i]); @@ -1526,7 +1537,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, } bool X86AsmParser:: -MatchInstruction(SMLoc IDLoc, +MatchInstruction(SMLoc IDLoc, unsigned &Kind, SmallVectorImpl<MCParsedAsmOperand*> &Operands, SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, bool matchingInlineAsm) { @@ -1537,7 +1548,7 @@ MatchInstruction(SMLoc IDLoc, // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. - // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || @@ -1568,7 +1579,7 @@ MatchInstruction(SMLoc IDLoc, MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, + switch (MatchInstructionImpl(Operands, Kind, Inst, OrigErrorInfo, isParsingIntelSyntax())) { default: break; case Match_Success: @@ -1585,9 +1596,6 @@ MatchInstruction(SMLoc IDLoc, Error(IDLoc, "instruction requires a CPU feature not currently enabled", EmptyRanges, matchingInlineAsm); return true; - case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction", - EmptyRanges, matchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1619,14 +1627,19 @@ MatchInstruction(SMLoc IDLoc, Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; + unsigned tKind; - Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match1 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match2 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match3 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + if (Match4 == Match_Success) Kind = tKind; // Restore the old token. Op->setTokenValue(Base); @@ -1677,8 +1690,10 @@ MatchInstruction(SMLoc IDLoc, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { + ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges : + Op->getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange(), matchingInlineAsm); + Ranges, matchingInlineAsm); } // Recover location info for the operand if we know which was the problem. diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 5039887e1a..f13692739a 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -44,7 +44,7 @@ void x86DisassemblerDebug(const char *file, dbgs() << file << ":" << line << ": " << s; } -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); return MII->getName(Opcode); } @@ -95,8 +95,8 @@ const EDInstInfo *X86GenericDisassembler::getEDInfo() const { /// be a pointer to a MemoryObject. /// @param byte - A pointer to the byte to be read. /// @param address - The address to be read. -static int regionReader(void* arg, uint8_t* byte, uint64_t address) { - MemoryObject* region = static_cast<MemoryObject*>(arg); +static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { + const MemoryObject* region = static_cast<const MemoryObject*>(arg); return region->readByte(address, byte); } @@ -135,10 +135,10 @@ X86GenericDisassembler::getInstruction(MCInst &instr, int ret = decodeInstruction(&internalInstr, regionReader, - (void*)®ion, + (const void*)®ion, loggerFn, (void*)&vStream, - (void*)MII, + (const void*)MII, address, fMode); @@ -379,6 +379,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } switch (type) { + case TYPE_XMM32: + case TYPE_XMM64: case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); return; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 0c929122ae..af444d196e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -200,7 +200,7 @@ static void unconsumeByte(struct InternalInstruction* insn) { insn->readerCursor + offset); \ if (ret) \ return ret; \ - combined = combined | ((type)byte << ((type)offset * 8)); \ + combined = combined | ((uint64_t)byte << (offset * 8)); \ } \ *ptr = combined; \ insn->readerCursor += sizeof(type); \ @@ -719,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn, void *miiArg) { +static int getID(struct InternalInstruction* insn, const void *miiArg) { uint8_t attrMask; uint16_t instructionID; @@ -1621,10 +1621,10 @@ static int readOperands(struct InternalInstruction* insn) { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 797703f803..05cbb4c597 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -403,7 +403,7 @@ typedef uint8_t BOOL; * be read from. * @return - -1 if the byte cannot be read for any reason; 0 otherwise. */ -typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); +typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); /* * dlog_t - Type for the logging function that the consumer can provide to @@ -422,7 +422,7 @@ struct InternalInstruction { /* Reader interface (C) */ byteReader_t reader; /* Opaque value passed to the reader */ - void* readerArg; + const void* readerArg; /* The address of the next byte to read via the reader */ uint64_t readerCursor; @@ -561,10 +561,10 @@ struct InternalInstruction { */ int decodeInstruction(struct InternalInstruction* insn, byteReader_t reader, - void* readerArg, + const void* readerArg, dlog_t logger, void* loggerArg, - void* miiArg, + const void* miiArg, uint64_t startLoc, DisassemblerMode mode); @@ -579,7 +579,7 @@ void x86DisassemblerDebug(const char *file, unsigned line, const char *s); -const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); +const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); #ifdef __cplusplus } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 624e56fa0f..40110353fc 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -941,3 +941,15 @@ and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the cost of reduced accuracy. //===---------------------------------------------------------------------===// + +This function should be matched to haddpd when the appropriate CPU is enabled: + +#include <x86intrin.h> +double f (__m128d p) { + return p[0] + p[1]; +} + +similarly, v[0]-v[1] should match to hsubpd, and {v[0]-v[1], w[0]-w[1]} should +turn into hsubpd also. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 18e6b7c3d9..d078a7b5df 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,9 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; +def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -160,7 +163,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 3280102c2d..8448556720 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" @@ -135,8 +134,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DEBUG(dbgs() << "JITTing function '" - << MF.getFunction()->getName() << "'\n"); + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5e15946d0f..23450f761c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2050,13 +2050,17 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { MVT VT; if (!isTypeLegal(C->getType(), VT)) - return false; + return 0; + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; + default: return 0; case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; @@ -2094,7 +2098,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { break; case MVT::f80: // No f80 support yet. - return false; + return 0; } // Materialize addresses with LEA instructions. diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 955c75aa56..9d5de814be 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -171,6 +171,7 @@ namespace { // Shuffle live registers to match the expectations of successor blocks. void finishBlockStack(); +#ifndef NDEBUG void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -181,6 +182,7 @@ namespace { dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]); dbgs() << "\n"; } +#endif /// getSlot - Return the stack slot number a particular register number is /// in. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index cdb7648751..6c0369f70a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -101,6 +101,7 @@ namespace { Base_Reg = Reg; } +#ifndef NDEBUG void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; @@ -134,6 +135,7 @@ namespace { dbgs() << "nul"; dbgs() << " JT" << JT << " Align" << Align << '\n'; } +#endif }; } @@ -1038,7 +1040,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); - uint64_t Disp = AddVal->getSExtValue() << Val; + uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; if (!FoldOffsetIntoAddress(Disp, AM)) return false; } @@ -2329,7 +2331,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Make sure that we don't change the operation by removing bits. // This only matters for OR and XOR, AND is unaffected. - if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) + uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1; + if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) break; unsigned ShlOp, Op; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fe853cba05..f7492b8291 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -85,7 +85,7 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -118,7 +118,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) * ElemsPerChunk); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -190,6 +190,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + // Bypass i32 with i8 on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext())); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); @@ -1063,7 +1067,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - if (Subtarget->hasFMA()) { + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Custom); setOperationAction(ISD::FMA, MVT::v4f64, Custom); setOperationAction(ISD::FMA, MVT::v4f32, Custom); @@ -2875,7 +2879,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = - ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + ((const X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; @@ -3550,25 +3554,26 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) MatchOddMask = false; } - static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; - static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; - const int *CompactionMask; - if (MatchEvenMask) - CompactionMask = CompactionMaskEven; - else if (MatchOddMask) - CompactionMask = CompactionMaskOdd; - else + if (!MatchEvenMask && !MatchOddMask) return SDValue(); - + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); - SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), - UndefNode, CompactionMask); - SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), - UndefNode, CompactionMask); - static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; - return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); + SDValue Op0 = SVOp->getOperand(0); + SDValue Op1 = SVOp->getOperand(1); + + if (MatchEvenMask) { + // Shift the second operand right to 32 bits. + static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 }; + Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask); + } else { + // Shift the first operand left to 32 bits. + static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 }; + Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask); + } + static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand @@ -5021,6 +5026,18 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); + + // Make sure the newly-created LOAD is in the same position as LDBase in + // terms of dependency. We create a TokenFactor for LDBase and ResNode, and + // update uses of LDBase's output chain to use the TokenFactor. + if (LDBase->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), + SDValue(ResNode.getNode(), 1)); + } + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); @@ -5925,8 +5942,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef<int> MaskVals = SVOp->getMask(); - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -5950,7 +5965,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (V2IsUndef) + + // As PSHUFB will zero elements with negative indices, it's safe to ignore + // the 2nd operand if it's undefined or zero. + if (V2.getOpcode() == ISD::UNDEF || + ISD::isBuildVectorAllZeros(V2.getNode())) return V1; // Calculate the shuffle mask for the second input, shuffle it, and @@ -6036,6 +6055,40 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } +// v32i8 shuffles - Translate to VPSHUFB if possible. +static +SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG, + const X86TargetLowering &TLI) { + EVT VT = SVOp->getValueType(0); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + ArrayRef<int> MaskVals = SVOp->getMask(); + + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + + if (VT != MVT::v32i8 || !TLI.getSubtarget()->hasAVX2() || !V2IsUndef) + return SDValue(); + + SmallVector<SDValue,32> pshufbMask; + for (unsigned i = 0; i != 32; i++) { + int EltIdx = MaskVals[i]; + if (EltIdx < 0 || EltIdx >= 32) + EltIdx = 0x80; + else { + if ((EltIdx >= 16 && i < 16) || (EltIdx < 16 && i >= 16)) + // Cross lane is not allowed. + return SDValue(); + EltIdx &= 0xf; + } + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); + } + return DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, dl, + MVT::v32i8, &pshufbMask[0], 32)); +} + /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide /// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in @@ -6862,6 +6915,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } + if (VT == MVT::v32i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, DAG, *this); + if (NewOp.getNode()) + return NewOp; + } + // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. if (NumElems == 4 && VT.is128BitVector()) @@ -10108,62 +10167,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1), Op.getOperand(2), DAG); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = Op.getOperand(2); - if (isa<ConstantSDNode>(ShAmt)) - return SDValue(); - - unsigned NewIntNo; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, - DAG.getConstant(0, MVT::i32)); -// FIXME this must be lowered to get rid of the invalid type. - - EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(NewIntNo, MVT::i32), - Op.getOperand(1), ShAmt); - } case Intrinsic::x86_sse42_pcmpistria128: case Intrinsic::x86_sse42_pcmpestria128: case Intrinsic::x86_sse42_pcmpistric128: @@ -10242,6 +10245,74 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); } + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + Opc = X86ISD::FMADD; + break; + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + Opc = X86ISD::FMSUB; + break; + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + Opc = X86ISD::FNMADD; + break; + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + Opc = X86ISD::FNMSUB; + break; + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + Opc = X86ISD::FMADDSUB; + break; + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: + Opc = X86ISD::FMSUBADD; + break; + } + + return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } } } @@ -14218,7 +14289,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // // where Op could be BRCOND or CMOV. // -static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { +static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if not CMP and SUB with its value result used. if (Cmp.getOpcode() != X86ISD::CMP && (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) @@ -14269,25 +14340,88 @@ static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { return SetCC.getOperand(1); } -static bool IsValidFCMOVCondition(X86::CondCode CC) { - switch (CC) { - default: - return false; - case X86::COND_B: - case X86::COND_BE: - case X86::COND_E: - case X86::COND_P: - case X86::COND_AE: - case X86::COND_A: - case X86::COND_NE: - case X86::COND_NP: - return true; +/// checkFlaggedOrCombine - DAG combination on X86ISD::OR, i.e. with EFLAGS +/// updated. If only flag result is used and the result is evaluated from a +/// series of element extraction, try to combine it into a PTEST. +static SDValue checkFlaggedOrCombine(SDValue Or, X86::CondCode &CC, + SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDNode *N = Or.getNode(); + DebugLoc DL = N->getDebugLoc(); + + // Only SSE4.1 and beyond supports PTEST or like. + if (!Subtarget->hasSSE41()) + return SDValue(); + + if (N->getOpcode() != X86ISD::OR) + return SDValue(); + + // Quit if the value result of OR is used. + if (N->hasAnyUseOfValue(0)) + return SDValue(); + + // Quit if not used as a boolean value. + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + SmallVector<SDValue, 8> Opnds; + SDValue VecIn; + EVT VT = MVT::Other; + unsigned Mask = 0; + + // Recognize a special case where a vector is casted into wide integer to + // test all 0s. + Opnds.push_back(N->getOperand(0)); + Opnds.push_back(N->getOperand(1)); + + for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { + SmallVector<SDValue, 8>::const_iterator I = Opnds.begin() + Slot; + // BFS traverse all OR'd operands. + if (I->getOpcode() == ISD::OR) { + Opnds.push_back(I->getOperand(0)); + Opnds.push_back(I->getOperand(1)); + // Re-evaluate the number of nodes to be traversed. + e += 2; // 2 more nodes (LHS and RHS) are pushed. + continue; + } + + // Quit if a non-EXTRACT_VECTOR_ELT + if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // Quit if without a constant index. + SDValue Idx = I->getOperand(1); + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + // Check if all elements are extracted from the same vector. + SDValue ExtractedFromVec = I->getOperand(0); + if (VecIn.getNode() == 0) { + VT = ExtractedFromVec.getValueType(); + // FIXME: only 128-bit vector is supported so far. + if (!VT.is128BitVector()) + return SDValue(); + VecIn = ExtractedFromVec; + } else if (VecIn != ExtractedFromVec) + return SDValue(); + + // Record the constant index. + Mask |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue(); } + + assert(VT.is128BitVector() && "Only 128-bit vector PTEST is supported so far."); + + // Quit if not all elements are used. + if (Mask != (1U << VT.getVectorNumElements()) - 1U) + return SDValue(); + + return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIn, VecIn); } /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); // If the flag operand isn't dead, don't touch this CMOV. @@ -14312,10 +14446,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(Cond, CC); + Flags = checkBoolTestSetCCCombine(Cond, CC); if (Flags.getNode() && // Extra check as FCMOV only supports a subset of X86 cond. - (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), + Ops, array_lengthof(Ops)); + } + + Flags = checkFlaggedOrCombine(Cond, CC, DAG, Subtarget); + if (Flags.getNode()) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, MVT::i8), Flags }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), @@ -15588,7 +15730,7 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes - // into FMINC and MMAXC, which are Commutative operations. + // into FMINC and FMAXC, which are Commutative operations. unsigned NewOp = 0; switch (N->getOpcode()) { default: llvm_unreachable("unknown opcode"); @@ -15706,8 +15848,13 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + EVT ScalarVT = VT.getScalarType(); - if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || + (!Subtarget->hasFMA() && !Subtarget->hasFMA4())) return SDValue(); SDValue A = N->getOperand(0); @@ -15729,9 +15876,10 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode; if (!NegMul) - Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB; else - Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); } @@ -15829,7 +15977,9 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); @@ -15845,7 +15995,13 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); + } + + Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); @@ -15867,7 +16023,14 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, SDValue Flags; - Flags = BoolTestSetCCCombine(EFLAGS, CC); + Flags = checkBoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); + } + + Flags = checkFlaggedOrCombine(EFLAGS, CC, DAG, Subtarget); if (Flags.getNode()) { SDValue Cond = DAG.getConstant(CC, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, @@ -16062,7 +16225,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); @@ -16092,7 +16255,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); - case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index bb6bb3adfa..0815e9b5b1 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -16,15 +16,18 @@ // // Return instructions. +// +// The X86retflag return instructions are variadic because we may add ST0 and +// ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins), + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; def RETW : I <0xC3, RawFrm, (outs), (ins), "ret{w}", [], IIC_RET>, OpSize; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 95ee7e50ba..56638002d8 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -19,7 +19,8 @@ let Constraints = "$src1 = $dst" in { multiclass fma3p_rm<bits<8> opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, - SDPatternOperator Op = null_frag, bit MayLoad = 1> { + SDPatternOperator Op = null_frag> { + let isCommutable = 1 in def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -27,7 +28,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, VR128:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, @@ -35,6 +36,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; + let isCommutable = 1 in def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, @@ -42,7 +44,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr, [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, VR256:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, @@ -59,7 +61,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy)), - MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>; + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy)), @@ -112,148 +114,18 @@ let ExeDomain = SSEPackedDouble in { v4f64>, VEX_W; } -let Predicates = [HasFMA] in { - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - -} // Predicates = [HasFMA] - let Constraints = "$src1 = $dst" in { multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType OpVT, PatFrag mem_frag, - SDPatternOperator OpNode = null_frag, bit MayLoad = 1> { + SDPatternOperator OpNode = null_frag> { + let isCommutable = 1 in def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; - let mayLoad = MayLoad in + let mayLoad = 1 in def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, @@ -266,6 +138,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop, ComplexPattern mem_cpat, Intrinsic IntId, RegisterClass RC> { + let isCommutable = 1 in def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -294,7 +167,7 @@ let neverHasSideEffects = 1 in { } defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), - x86memop, RC, OpVT, mem_frag, OpNode, 0>, + x86memop, RC, OpVT, mem_frag, OpNode>, fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), memop, mem_cpat, Int, RC>; } @@ -324,73 +197,102 @@ defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, //===----------------------------------------------------------------------===// -multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { - def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), +multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType OpVT, SDNode OpNode, + PatFrag mem_frag> { + let isCommutable = 1 in + def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; - def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, memop:$src3), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4; + def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4; - def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, memop:$src2, VR128:$src3), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)))]>, VEX_W, MemOp4; + def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, - (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; + [(set RC:$dst, + (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>; // For disassembler let isCodeGenOnly = 1 in - def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), + def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; } -multiclass fma4p<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256, +multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, + ComplexPattern mem_cpat, Intrinsic Int> { + let isCommutable = 1 in + def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, + mem_cpat:$src3))]>, VEX_W, MemOp4; + def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, memop:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +} + +multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT128, ValueType OpVT256, PatFrag ld_frag128, PatFrag ld_frag256> { + let isCommutable = 1 in def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; + (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, + VEX_W, MemOp4; def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, + [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; + let isCommutable = 1 in def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, - (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4; + (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, + VEX_W, MemOp4; def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, + [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4; def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, - (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; + (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; // For disassembler let isCodeGenOnly = 1 in { def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), @@ -406,45 +308,58 @@ let isCodeGenOnly = 1 in { let Predicates = [HasFMA4] in { -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps, - int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>; -defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd, - int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps, - int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd, - int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps, - int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>; -defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd, - int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; -defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps, - int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>; -defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd, - int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>; -defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>; -defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd, - int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd, - int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>; +defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; +defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; +defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; +defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; +defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; +defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; +defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; +defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + +defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64, + memopv2f64, memopv4f64>; +defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32, + memopv4f32, memopv8f32>; +defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64, + memopv2f64, memopv4f64>; } // HasFMA4 diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index ba3d4ef7be..247c42ce8a 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -288,12 +288,14 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +def __xs : XS; + // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -304,7 +306,7 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); @@ -315,18 +317,25 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } +// MMXPI - SSE 1 & 2 packed instructions with MMX operands +class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { + let Predicates = !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]); +} + // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin, Domain d> : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], - !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])); // AVX instructions have a 'v' prefix in the mnemonic let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); @@ -342,18 +351,18 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, @@ -373,27 +382,31 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. +// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as +// MMX operands. +// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as +// MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, @@ -406,6 +419,12 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; +class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; +class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: // @@ -416,21 +435,23 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, - Requires<[HasSSE3]>; + Requires<[UseSSE3]>; // SSSE3 Instruction Templates: // // SS38I - SSSE3 instructions with T8 prefix. // SS3AI - SSSE3 instructions with TA prefix. +// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands. +// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands. // // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version // uses the MMX registers. The 64-bit versions are grouped with the MMX @@ -439,10 +460,18 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSSE3]>; + Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + Requires<[UseSSSE3]>; +class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, + Requires<[HasSSSE3]>; +class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -453,11 +482,11 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; // SSE4.2 Instruction Templates: // @@ -465,9 +494,10 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. +// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; @@ -476,7 +506,7 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, - Requires<[HasSSE42]>; + Requires<[UseSSE42]>; // AVX Instruction Templates: // Instructions introduced in AVX (no SSE equivalent forms) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ee2d3c43ec..32e4315fbd 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -183,8 +183,8 @@ def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; -def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; -def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>; def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index bb14182e76..bad694a04e 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1115,6 +1115,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, // FIXME: add AVX 256-bit foldable instructions + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4mrY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -1242,6 +1272,36 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, + + // FMA4 foldable patterns + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 }, + { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, + { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, + { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, + { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, + { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, + { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 }, + { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, + { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBPD4rrY, X86::VFNMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_16 }, + { X86::VFMADDSUBPS4rrY, X86::VFMADDSUBPS4rmY, TB_ALIGN_32 }, + { X86::VFMADDSUBPD4rrY, X86::VFMADDSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 }, + { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 }, + { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1791,10 +1851,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isKill = MI->getOperand(1).isKill(); + const MachineOperand &Dest = MI->getOperand(0); + const MachineOperand &Src = MI->getOperand(1); MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When @@ -1812,11 +1870,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHUFPDrri: { @@ -1826,15 +1882,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; - unsigned A = MI->getOperand(0).getReg(); unsigned M = MI->getOperand(3).getImm(); // Convert to PSHUFD mask. M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addReg(A, RegState::Define | getDeadRegState(isDead)) - .addReg(B, getKillRegState(isKill)).addImm(M); + .addOperand(Dest).addOperand(Src).addImm(M); break; } case X86::SHL64ri: { @@ -1845,15 +1899,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR64_NOSPRegClass)) return 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1864,15 +1917,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ShAmt == 0 || ShAmt >= 4) return 0; // LEA can't handle ESP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), + &X86::GR32_NOSPRegClass)) return 0; unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1885,10 +1938,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0).addReg(0); + .addOperand(Dest) + .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); break; } default: { @@ -1911,14 +1962,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), 1); break; } case X86::INC16r: @@ -1926,10 +1975,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, 1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), 1); break; case X86::DEC64r: case X86::DEC32r: @@ -1941,14 +1988,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, (const TargetRegisterClass*)&X86::GR64_NOSPRegClass : (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. - if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, RC)) + if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && + !MF.getRegInfo().constrainRegClass(Src.getReg(), RC)) return 0; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), -1); break; } case X86::DEC16r: @@ -1956,10 +2001,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, -1); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), -1); break; case X86::ADD64rr: case X86::ADD64rr_DB: @@ -1986,9 +2029,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return 0; NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); // Preserve undefness of the operands. bool isUndef = MI->getOperand(1).isUndef(); @@ -2008,9 +2050,15 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, Src2, isKill2); + .addOperand(Dest), + Src.getReg(), Src.isKill(), Src2, isKill2); + + // Preserve undefness of the operands. + bool isUndef = MI->getOperand(1).isUndef(); + bool isUndef2 = MI->getOperand(2).isUndef(); + NewMI->getOperand(1).setIsUndef(isUndef); + NewMI->getOperand(3).setIsUndef(isUndef2); + if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); break; @@ -2020,10 +2068,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32_DB: case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; case X86::ADD32ri: case X86::ADD32ri8: @@ -2031,10 +2078,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } case X86::ADD16ri: @@ -2044,10 +2090,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addOperand(Dest).addOperand(Src), + MI->getOperand(2).getImm()); break; } } @@ -2056,10 +2101,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (!NewMI) return 0; if (LV) { // Update live variables - if (isKill) - LV->replaceKillInstruction(Src, MI, NewMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, NewMI); + if (Src.isKill()) + LV->replaceKillInstruction(Src.getReg(), MI, NewMI); + if (Dest.isDead()) + LV->replaceKillInstruction(Dest.getReg(), MI, NewMI); } MFI->insert(MBBI, NewMI); // Insert the new inst @@ -3450,6 +3495,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::FsFLD0SS: case X86::FsFLD0SD: return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); + case X86::AVX_SET0: + assert(HasAVX && "AVX not supported"); + return Expand2AddrUndef(MI, get(X86::VXORPSYrr)); + case X86::V_SETALLONES: + return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); + case X86::AVX2_SETALLONES: + return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; @@ -3563,14 +3615,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 - if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); - else if (MI->getOpcode() == X86::MOV32r0) - NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV16r0) - NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV8r0) - NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); + unsigned Opc = 0; + switch (MI->getOpcode()) { + default: break; + case X86::MOV64r0: Opc = X86::MOV64mi32; break; + case X86::MOV32r0: Opc = X86::MOV32mi; break; + case X86::MOV16r0: Opc = X86::MOV16mi; break; + case X86::MOV8r0: Opc = X86::MOV8mi; break; + } + if (Opc) + NewMI = MakeM0Inst(*this, Opc, MOs, MI); if (NewMI) return NewMI; @@ -3799,15 +3853,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: Alignment = 32; break; case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SETALLONES: Alignment = 16; break; case X86::FsFLD0SD: @@ -3843,11 +3894,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (LoadMI->getOpcode()) { case X86::V_SET0: case X86::V_SETALLONES: - case X86::AVX_SET0PSY: - case X86::AVX_SET0PDY: - case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: - case X86::AVX2_SET0: + case X86::AVX_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -3879,15 +3927,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getFloatTy(MF.getFunction()->getContext()); else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); - else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) - Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); - else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0) + else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES || - Opc == X86::AVX2_SETALLONES); + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3962,6 +4007,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (OpNum == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 762284669e..ff09ceb87c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -114,7 +114,7 @@ def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, - [SDNPHasChain]>; + [SDNPHasChain,SDNPSideEffect]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, [SDNPHasChain]>; def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, @@ -568,14 +568,21 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index c8f40bbb49..bd5485840d 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -118,11 +118,11 @@ let Constraints = "$src1 = $dst" in { /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], @@ -134,11 +134,11 @@ let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { let isCommutable = 0 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, @@ -149,11 +149,11 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, /// PALIGN MMX instructions (require SSSE3). multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { - def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; - def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, @@ -163,12 +163,10 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { - def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], - itins.rr, d>; - def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], - itins.rm, d>; + def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -243,29 +241,30 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (x86mmx (bitconvert - (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))))], - IIC_MMX_MOVQ_RR>; - -def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector - (i64 (bitconvert (x86mmx VR64:$src))))))], - IIC_MMX_MOVQ_RR>; +def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (x86mmx (bitconvert + (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))))))], + IIC_MMX_MOVQ_RR>; + +def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 + (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))], + IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; -def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; +def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", + [], IIC_MMX_MOVQ_RR>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", @@ -577,6 +576,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), IIC_MMX_MASKMOV>; // 64-bit bit convert. +let Predicates = [HasSSE2] in { def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), @@ -585,5 +585,6 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; +} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 220c06ddcf..5dcbf8084a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -251,35 +251,37 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))), +def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; -def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))), +def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; -def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))), +def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; -def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))), +def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; -def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))), +def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; -def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))), +def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; // A 128-bit subvector insert to the first 256-bit vector position // is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)), +let AddedComplexity = 25 in { // to give priority over vinsertf128rm +def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; -def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), +def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), @@ -362,7 +364,7 @@ let Predicates = [HasAVX] in { def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; } -// Alias instructions that map fld0 to pxor for sse. +// Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { @@ -382,11 +384,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, neverHasSideEffects = 1 in { -def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>; + isPseudo = 1 in { +def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; } -def : Pat<(v4f32 immAllZerosV), (V_SET0)>; def : Pat<(v2f64 immAllZerosV), (V_SET0)>; def : Pat<(v4i32 immAllZerosV), (V_SET0)>; def : Pat<(v2i64 immAllZerosV), (V_SET0)>; @@ -394,35 +396,29 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0)>; def : Pat<(v16i8 immAllZerosV), (V_SET0)>; -// The same as done above but for AVX. The 256-bit ISA does not support PI, +// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, // and doesn't need it because on sandy bridge the register is set to zero // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -let Predicates = [HasAVX] in { -def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; -def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; -} -let Predicates = [HasAVX2], neverHasSideEffects = 1 in -def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "", - []>, VEX_4V; + isPseudo = 1, Predicates = [HasAVX] in { +def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>; } -let Predicates = [HasAVX2], AddedComplexity = 5 in { - def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; - def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; +let Predicates = [HasAVX] in + def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; + +let Predicates = [HasAVX2] in { + def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; + def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; + def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; + def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; } -// AVX has no support for 256-bit integer instructions, but since the 128-bit +// AVX1 has no support for 256-bit integer instructions, but since the 128-bit // VPXOR instruction writes zero to its upper part, it's safe build zeros. +let Predicates = [HasAVX1Only] in { def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; @@ -438,22 +434,17 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; +} // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementation, it does not expand the instructions below like -// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { - let Predicates = [HasAVX] in - def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; + isPseudo = 1 in { + def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in - def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", - [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; + def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>; } @@ -605,27 +596,27 @@ let Predicates = [HasAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), + (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), + (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, - (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), + (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, - (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))), + (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. @@ -704,7 +695,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. @@ -738,7 +729,7 @@ let Predicates = [HasSSE1] in { (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 15 in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. @@ -916,16 +907,16 @@ let isCodeGenOnly = 1 in { let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86vzmovl - (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl - (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl - (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))), + (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl - (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))), + (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; } @@ -975,10 +966,10 @@ let Predicates = [HasAVX] in { (VMOVUPDmr addr:$dst, VR128:$src)>; } -let Predicates = [HasSSE1] in +let Predicates = [UseSSE1] in def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), (MOVUPSmr addr:$dst, VR128:$src)>; -let Predicates = [HasSSE2] in +let Predicates = [UseSSE2] in def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; @@ -1028,12 +1019,52 @@ let Predicates = [HasAVX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v32i8 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>; + + // Special patterns for storing subvector extracts of lower 128-bits + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(alignedstore (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + + def : Pat<(store (v2f64 (extract_subvector + (v4f64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4f32 (extract_subvector + (v8f32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v2i64 (extract_subvector + (v4i64 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v4i32 (extract_subvector + (v8i32 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v8i16 (extract_subvector + (v16i16 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + def : Pat<(store (v16i8 (extract_subvector + (v32i8 VR256:$src), (iPTR 0))), addr:$dst), + (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; } // Use movaps / movups for SSE integer load / store (one byte shorter). // The instructions selected below are then converted to MOVDQA/MOVDQU // during the SSE domain pass. -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(alignedloadv2i64 addr:$src), (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), @@ -1180,7 +1211,7 @@ let Predicates = [HasAVX] in { (VMOVLPDmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), @@ -1205,7 +1236,7 @@ let Predicates = [HasSSE1] in { (MOVLPSmr addr:$src1, VR128:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; @@ -1279,7 +1310,7 @@ let Predicates = [HasAVX] in { (VMOVHPDrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVHPS patterns def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), @@ -1289,7 +1320,7 @@ let Predicates = [HasSSE1] in { (MOVHPSrm VR128:$src1, addr:$src2)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold // cause it has two uses through a bitcast. One use disappears at isel time @@ -1346,7 +1377,7 @@ let Predicates = [HasAVX] in { (VMOVHLPSrr VR128:$src1, VR128:$src2)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { // MOVLHPS patterns def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; @@ -1456,7 +1487,7 @@ def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1633,7 +1664,7 @@ defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB, Requires<[HasSSE2]>; + TB, Requires<[UseSSE2]>; /// SSE 2 Only @@ -1663,7 +1694,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1684,13 +1715,13 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; } // Convert scalar single to scalar double @@ -1709,30 +1740,28 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; } -let AddedComplexity = 1 in { // give AVX priority - def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; - def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; +def : Pat<(f64 (fextend FR32:$src)), + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; +def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX, OptForSize]>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; -} // AddedComplexity = 1 +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1740,9 +1769,9 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), // Since these loads aren't folded into the fextend, we have to match it // explicitly here. def : Pat<(fextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>; + (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1762,13 +1791,13 @@ def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; } // Convert packed single/double fp to doubleword @@ -1904,7 +1933,7 @@ let Predicates = [HasAVX] in { (VCVTTPS2DQYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -1994,7 +2023,7 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>, TB, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], @@ -2109,7 +2138,7 @@ let Predicates = [HasAVX] in { (VCVTPS2PDYrm addr:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Match fextend for 128 conversions def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (CVTPS2PDrr VR128:$src)>; @@ -2336,14 +2365,14 @@ def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), @@ -2420,7 +2449,7 @@ let Predicates = [HasAVX] in { (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; } -let Predicates = [HasSSE1] in { +let Predicates = [UseSSE1] in { def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; @@ -2428,7 +2457,7 @@ let Predicates = [HasSSE1] in { (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // Generic SHUFPD patterns def : Pat<(v2i64 (X86Shufp VR128:$src1, (memopv2i64 addr:$src2), (i8 imm:$imm))), @@ -2500,7 +2529,27 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble>, TB, OpSize; } // Constraints = "$src1 = $dst" -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX1Only] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + +let Predicates = [HasAVX] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2509,7 +2558,7 @@ let Predicates = [HasAVX], AddedComplexity = 1 in { (VUNPCKLPDrr VR128:$src, VR128:$src)>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel @@ -2578,16 +2627,16 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", def : Pat<(i32 (X86fgetsign FR32:$src)), (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, - Requires<[HasSSE1]>; + Requires<[UseSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, - Requires<[HasSSE2]>; + Requires<[UseSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2683,14 +2732,12 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, } // Alias bitwise logical operations using SSE logical ops on packed FP values. -let mayLoad = 0 in { - defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, - SSE_BIT_ITINS_P>; - defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, - SSE_BIT_ITINS_P>; - defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, - SSE_BIT_ITINS_P>; -} +defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, + SSE_BIT_ITINS_P>; +defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, + SSE_BIT_ITINS_P>; +defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, + SSE_BIT_ITINS_P>; let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, @@ -2794,27 +2841,23 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins, bit Is2Addr = 1> { - let mayLoad = 0 in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, TB, OpSize; - } } multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { - let mayLoad = 0 in { - defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, + defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, TB; - defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, + defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, TB, OpSize; - } } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, @@ -2924,7 +2967,7 @@ let Constraints = "$src1 = $dst" in { } } -let isCommutable = 1, isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 in { defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, @@ -2978,7 +3021,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[HasSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; @@ -2992,7 +3035,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; @@ -3000,6 +3043,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -3062,7 +3106,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[HasSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; @@ -3072,20 +3116,20 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +let hasSideEffects = 0 in multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - let neverHasSideEffects = 1 in { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in + let mayLoad = 1 in { def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - } def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3176,7 +3220,6 @@ let Predicates = [HasAVX] in { SSE_RCPP>, VEX; } -let AddedComplexity = 1 in { def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3199,9 +3242,8 @@ def : Pat<(f32 (X86frcp FR32:$src)), def : Pat<(f32 (X86frcp (load addr:$src))), (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX, OptForSize]>; -} -let Predicates = [HasAVX], AddedComplexity = 1 in { +let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS VR128:$src, FR32)), @@ -3322,7 +3364,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVNT>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), @@ -3482,7 +3524,7 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler let isCodeGenOnly = 1 in { @@ -3492,7 +3534,7 @@ def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } let canFoldAsLoad = 1, mayLoad = 1 in { @@ -3504,7 +3546,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], IIC_SSE_MOVU_P_RM>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let mayStore = 1 in { @@ -3516,7 +3558,7 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // Intrinsic forms of MOVDQU load and store @@ -3530,7 +3572,7 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], IIC_SSE_MOVU_P_MR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4028,7 +4070,7 @@ let Predicates = [HasAVX2] in { (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), @@ -4210,7 +4252,7 @@ let Predicates = [HasAVX2] in { defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX; } -let Predicates = [HasSSE2] in { +let Predicates = [UseSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; @@ -4325,28 +4367,6 @@ let Constraints = "$src1 = $dst" in { } } // ExeDomain = SSEPackedInt -// Patterns for using AVX1 instructions with integer vectors -// Here to give AVX2 priority -let Predicates = [HasAVX] in { - def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -4395,7 +4415,7 @@ let Predicates = [HasAVX] in { } let Constraints = "$src1 = $dst" in - defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>; } // ExeDomain = SSEPackedInt @@ -4556,7 +4576,7 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, @@ -4672,14 +4692,14 @@ let Predicates = [HasAVX] in { } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))), + (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))), + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -4719,7 +4739,7 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, - Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int @@ -4762,7 +4782,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4773,7 +4793,7 @@ let Predicates = [HasAVX], AddedComplexity = 20 in { (VMOVZQI2PQIrm addr:$src)>; } -let Predicates = [HasSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), @@ -4803,7 +4823,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4818,7 +4838,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, - XS, Requires<[HasSSE2]>; + XS, Requires<[UseSSE2]>; } let AddedComplexity = 20 in { @@ -4828,7 +4848,7 @@ let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } - let Predicates = [HasSSE2] in { + let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), @@ -4908,7 +4928,7 @@ let Predicates = [HasAVX] in { (VMOVSLDUPYrm addr:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), (MOVSHDUPrr VR128:$src)>; def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), @@ -4977,7 +4997,7 @@ let Predicates = [HasAVX] in { (VMOVDDUPYrr VR256:$src)>; } -let Predicates = [HasSSE3] in { +let Predicates = [UseSSE3] in { def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), @@ -5041,7 +5061,7 @@ let Predicates = [HasAVX] in { f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; } } -let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { +let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, f128mem, SSE_ALU_F32P>, TB, XD; @@ -5424,7 +5444,7 @@ let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V; -let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in +let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGN : ssse3_palign<"palignr">; let Predicates = [HasAVX2] in { @@ -5449,7 +5469,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } -let Predicates = [HasSSSE3] in { +let Predicates = [UseSSSE3] in { def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), @@ -5583,7 +5603,7 @@ let Predicates = [HasAVX] in { (VPMOVZXDQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), (PMOVSXBWrm addr:$src)>; @@ -5633,7 +5653,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; } @@ -5704,7 +5724,7 @@ let Predicates = [HasAVX] in { (VPMOVZXWQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), (PMOVSXBDrm addr:$src)>; @@ -5772,7 +5792,7 @@ let Predicates = [HasAVX] in { (VPMOVZXBQrm addr:$src)>; } -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5918,7 +5938,7 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + Requires<[UseSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -6356,7 +6376,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, Intrinsic IntId256> { let isCommutable = 1 in @@ -6705,7 +6725,7 @@ def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; -let Predicates = [HasSSE41] in { +let Predicates = [UseSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), (v16i8 VR128:$src2))), (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; @@ -6802,9 +6822,8 @@ multiclass pseudo_pcmpistrm<string asm> { } let Defs = [EFLAGS], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; - defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; + defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { @@ -6840,9 +6859,8 @@ multiclass pseudo_pcmpestrm<string asm> { } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { - let AddedComplexity = 1 in - defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; - defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; + defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; } let Predicates = [HasAVX], @@ -7237,40 +7255,59 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), let Predicates = [HasAVX] in { def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; } @@ -7290,56 +7327,61 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), []>, VEX; } -// Extract and store. -let Predicates = [HasAVX] in { - def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - - def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))), - (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; -} - // AVX1 patterns let Predicates = [HasAVX] in { -def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), - (VEXTRACTF128rr VR256:$src1, imm:$src2)>; - -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4f32 (VEXTRACTF128rr (v8f32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2f64 (VEXTRACTF128rr (v4f64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + +def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +} + +let Predicates = [HasAVX1Only] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// @@ -7456,29 +7498,29 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), } let Predicates = [HasAVX] in { +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +} + +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; @@ -7665,19 +7707,22 @@ let Predicates = [HasAVX2] in { } // AVX1 broadcast patterns -let Predicates = [HasAVX] in { +let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; +} + +let Predicates = [HasAVX] in { def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), (VBROADCASTSDYrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. @@ -7757,7 +7802,6 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // -let AddedComplexity = 1 in { def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7768,9 +7812,8 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$src3)))]>, VEX_4V; -} -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), @@ -7805,23 +7848,43 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } -let Predicates = [HasAVX2], AddedComplexity = 1 in { +let Predicates = [HasAVX2] in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), + (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), + (bc_v4i32 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), + (bc_v16i8 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), + (bc_v8i16 (memopv2i64 addr:$src2)), + (iPTR imm)), + (VINSERTI128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; } //===----------------------------------------------------------------------===// @@ -7838,23 +7901,40 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -let Predicates = [HasAVX2], AddedComplexity = 1 in { -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v4i32 (VEXTRACTI128rr (v8i32 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v8i16 (VEXTRACTI128rr (v16i16 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), +def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), (v16i8 (VEXTRACTI128rr (v32i8 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + +def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; +def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), + (iPTR imm))), addr:$dst), + (VEXTRACTI128mr addr:$dst, VR256:$src1, + (EXTRACT_get_vextractf128_imm VR128:$ext))>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 02abe85534..4b528f6153 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -555,7 +555,7 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { #endif } -template<typename T> void addUnaligned(void *Pos, T Delta) { +template<typename T> static void addUnaligned(void *Pos, T Delta) { T Value; std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos), sizeof(T)); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index fd9e18aeb7..811a4b5618 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -377,12 +377,6 @@ ReSimplify: case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; - case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; - case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; - case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; - case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; - case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 67ed6d105d..610e7ed7f3 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasVectorUAMem(false) , HasCmpxchg16b(false) , UseLeaForSP(false) + , HasSlowDivide(false) , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a91c12506c..4c7b8fc4de 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -136,6 +136,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// HasSlowDivide - True if smaller divides are significantly faster than + /// full divides and should be used when possible. + bool HasSlowDivide; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -201,6 +205,7 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } + bool hasNoAVX() const { return X86SSELevel < AVX; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } @@ -208,7 +213,8 @@ public: bool hasAES() const { return HasAES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasFMA() const { return HasFMA; } - bool hasFMA4() const { return HasFMA4; } + // FIXME: Favor FMA when both are enabled. Is this the right thing to do? + bool hasFMA4() const { return HasFMA4 && !HasFMA; } bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } @@ -222,6 +228,7 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasSlowDivide() const { return HasSlowDivide; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 80b75dc5f9..449eed3d8d 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -42,7 +42,6 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. - MachineBasicBlock *MBB; // Current basic block // Any YMM register live-in to this function? bool FnHasLiveInYmm; @@ -84,7 +83,7 @@ namespace { // 2) All states must be clean for the result to be clean // 3) If none above and one unknown, the result state is also unknown // - unsigned computeState(unsigned PrevState, unsigned CurState) { + static unsigned computeState(unsigned PrevState, unsigned CurState) { if (PrevState == ST_INIT) return CurState; @@ -122,7 +121,7 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { } static bool hasYmmReg(MachineInstr *MI) { - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -189,7 +188,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; unsigned BBNum = BB.getNumber(); - MBB = &BB; // Don't process already solved BBs if (BBSolved[BBNum]) @@ -207,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // The entry MBB for the function may set the initial state to dirty if // the function receives any YMM incoming arguments - if (MBB == MF.begin()) { + if (&BB == MF.begin()) { EntryState = ST_CLEAN; if (FnHasLiveInYmm) EntryState = ST_DIRTY; @@ -253,7 +251,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, // When unknown, only compute the information within the block to have // it available in the exit if possible, but don't change the block. if (EntryState != ST_UNKNOWN) { - BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + BuildMI(BB, I, dl, TII->get(X86::VZEROUPPER)); ++NumVZU; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index ae646a2485..3e7666bdb9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -33,7 +33,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPMayLoad]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -58,7 +58,7 @@ def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress, def SDT_XCoreStwsp : SDTypeProfile<0, 2, [SDTCisInt<1>]>; def XCoreStwsp : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp, - [SDNPHasChain]>; + [SDNPHasChain, SDNPMayStore]>; // These are target-independent nodes, but have target-specific formats. def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index cdd0a0893b..be5855abcd 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -176,7 +176,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, #ifndef NDEBUG DEBUG(errs() << "\nFunction : " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); DEBUG(errs() << "<--------->\n"); DEBUG(MI.print(errs())); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 6d950d2024..b888e95982 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -346,7 +346,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) { /// Given a value that is stored to a global but never read, determine whether /// it's safe to remove the store and the chain of computation that feeds the /// store. -static bool IsSafeComputationToRemove(Value *V) { +static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { do { if (isa<Constant>(V)) return true; @@ -355,7 +355,7 @@ static bool IsSafeComputationToRemove(Value *V) { if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) return false; - if (isAllocationFn(V)) + if (isAllocationFn(V, TLI)) return true; Instruction *I = cast<Instruction>(V); @@ -376,7 +376,8 @@ static bool IsSafeComputationToRemove(Value *V) { /// of the global and clean up any that obviously don't assign the global a /// value that isn't dynamically allocated. /// -static bool CleanupPointerRootUsers(GlobalVariable *GV) { +static bool CleanupPointerRootUsers(GlobalVariable *GV, + const TargetLibraryInfo *TLI) { // A brief explanation of leak checkers. The goal is to find bugs where // pointers are forgotten, causing an accumulating growth in memory // usage over time. The common strategy for leak checkers is to whitelist the @@ -432,18 +433,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. Dead.clear(); - CleanupPointerRootUsers(GV); + CleanupPointerRootUsers(GV, TLI); return true; } } } for (int i = 0, e = Dead.size(); i != e; ++i) { - if (IsSafeComputationToRemove(Dead[i].first)) { + if (IsSafeComputationToRemove(Dead[i].first, TLI)) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I)) + if (isAllocationFn(I, TLI)) break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) @@ -975,7 +976,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // nor is the global. if (AllNonStoreUsesGone) { if (isLeakCheckerRoot(GV)) { - Changed |= CleanupPointerRootUsers(GV); + Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; CleanupConstantGlobalUsers(GV, 0, TD, TLI); @@ -1465,9 +1466,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, - Value *NElems, TargetData *TD) { + Value *NElems, TargetData *TD, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); - Type *MAT = getMallocAllocatedType(CI); + Type *MAT = getMallocAllocatedType(CI, TLI); StructType *STy = cast<StructType>(MAT); // There is guaranteed to be at least one use of the malloc (storing @@ -1688,7 +1690,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // This eliminates dynamic allocation, avoids an indirection accessing the // data, and exposes the resultant global to further GlobalOpt. // We cannot optimize the malloc if we cannot determine malloc array size. - Value *NElems = getMallocArraySize(CI, TD, true); + Value *NElems = getMallocArraySize(CI, TD, TLI, true); if (!NElems) return false; @@ -1725,7 +1727,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 - if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { + if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); @@ -1742,7 +1744,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CI = cast<CallInst>(Malloc); } - GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); + GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true), + TD, TLI); return true; } @@ -1771,8 +1774,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, // Optimize away any trapping uses of the loaded value. if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI)) return true; - } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { - Type *MallocType = getMallocAllocatedType(CI); + } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) { + Type *MallocType = getMallocAllocatedType(CI, TLI); if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI, TD, TLI)) @@ -1964,7 +1967,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, bool Changed; if (isLeakCheckerRoot(GV)) { // Delete any constant stores to the global. - Changed = CleanupPointerRootUsers(GV); + Changed = CleanupPointerRootUsers(GV, TLI); } else { // Delete any stores we can find to the global. We may not be able to // make it completely dead though. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 712888aee9..69a22fb1e6 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -339,6 +340,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); SmallPtrSet<Function*, 8> SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -417,7 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction())) { + if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cbe1ca4ddc..b12fc01357 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -168,7 +168,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) + if (isFreeCall(&CI, TLI)) return visitFree(CI); // If the caller function is nounwind, mark the call as nounwind, even if the @@ -243,7 +243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: break; case Intrinsic::objectsize: { uint64_t Size; - if (getObjectSize(II->getArgOperand(0), Size, TD)) + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } @@ -877,7 +877,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { - if (isAllocLikeFn(CS.getInstruction())) + if (isAllocLikeFn(CS.getInstruction(), TLI)) return visitAllocSite(*CS.getInstruction()); bool Changed = false; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 35a0bbb761..2a7182fc1d 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -462,6 +462,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { } } + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) + if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) { + Value *X; + ConstantInt *C1; + if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) { + APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1)); + return BinaryOperator::CreateUDiv(X, Builder->getInt(NC)); + } + } + // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 68ecd51604..ff758c40af 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1068,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isAllocationFn(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0), TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1107,7 +1107,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 4> Worklist; Worklist.push_back(AI); @@ -1163,7 +1164,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { } } - if (isFreeCall(I)) { + if (isFreeCall(I, TLI)) { Users.push_back(I); continue; } @@ -1188,7 +1189,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (isAllocSiteRemovable(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users, TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1872,7 +1873,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Instruction *Inst = BBI++; // DCE instruction if trivially dead. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); @@ -2002,7 +2003,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { DEBUG(errs() << "IC: DCE: " << *I << '\n'); EraseInstFromFunction(*I); ++NumDeadInst; @@ -2102,7 +2103,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the instruction was modified, it's possible that it is now dead. // if so, remove it. - if (isInstructionTriviallyDead(I)) { + if (isInstructionTriviallyDead(I, TLI)) { EraseInstFromFunction(*I); } else { Worklist.Add(I); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 06f4d2fedd..0775cf4a22 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "asan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" @@ -217,7 +217,7 @@ struct AddressSanitizer : public ModulePass { Function *AsanCtorFunction; Function *AsanInitFunction; Instruction *CtorInsertBefore; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; @@ -544,6 +544,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G); + if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; // Touch only those globals that will not be defined in other modules. @@ -643,6 +644,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); + // Don't check initialization order if this global is blacklisted. + GlobalHasDynamicInitializer &= !BL->isInInit(*G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -736,7 +739,7 @@ bool AddressSanitizer::runOnModule(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); @@ -774,7 +777,7 @@ bool AddressSanitizer::runOnModule(Module &M) { /*hasSideEffects=*/true); llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp new file mode 100644 index 0000000000..2cb119964a --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -0,0 +1,102 @@ +//===-- BlackList.cpp - blacklist for sanitizers --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include <utility> +#include <string> + +#include "BlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +BlackList::BlackList(const StringRef Path) { + // Validate and open blacklist file. + if (!Path.size()) return; + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path, File)) { + report_fatal_error("Can't open blacklist file: " + Path + ": " + + EC.message()); + } + + // Iterate through each line in the blacklist file. + SmallVector<StringRef, 16> Lines; + SplitString(File.take()->getBuffer(), Lines, "\n\r"); + StringMap<std::string> Regexps; + for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end(); + I != E; ++I) { + // Get our prefix and unparsed regexp. + std::pair<StringRef, StringRef> SplitLine = I->split(":"); + StringRef Prefix = SplitLine.first; + std::string Regexp = SplitLine.second; + + // Replace * with .* + for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; + pos += strlen(".*")) { + Regexp.replace(pos, strlen("*"), ".*"); + } + + // Check that the regexp is valid. + Regex CheckRE(Regexp); + std::string Error; + if (!CheckRE.isValid(Error)) { + report_fatal_error("malformed blacklist regex: " + SplitLine.second + + ": " + Error); + } + + // Add this regexp into the proper group by its prefix. + if (Regexps[Prefix].size()) + Regexps[Prefix] += "|"; + Regexps[Prefix] += Regexp; + } + + // Iterate through each of the prefixes, and create Regexs for them. + for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end(); + I != E; ++I) { + Entries[I->getKey()] = new Regex(I->getValue()); + } +} + +bool BlackList::isIn(const Function &F) { + return isIn(*F.getParent()) || inSection("fun", F.getName()); +} + +bool BlackList::isIn(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global", G.getName()); +} + +bool BlackList::isIn(const Module &M) { + return inSection("src", M.getModuleIdentifier()); +} + +bool BlackList::isInInit(const GlobalVariable &G) { + return isIn(*G.getParent()) || inSection("global-init", G.getName()); +} + +bool BlackList::inSection(const StringRef Section, + const StringRef Query) { + Regex *FunctionRegex = Entries[Section]; + return FunctionRegex ? FunctionRegex->match(Query) : false; +} + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h new file mode 100644 index 0000000000..73977fc10a --- /dev/null +++ b/lib/Transforms/Instrumentation/BlackList.h @@ -0,0 +1,55 @@ +//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions or global +// variables based on a user-supplied blacklist. +// +// The blacklist disables instrumentation of various functions and global +// variables. Each line contains a prefix, followed by a wild card expression. +// --- +// fun:*_ZN4base6subtle* +// global:*global_with_bad_access_or_initialization* +// global-init:*global_with_initialization_issues* +// src:file_with_tricky_code.cc +// --- +// Note that the wild card is in fact an llvm::Regex, but * is automatically +// replaced with .* +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +// +//===----------------------------------------------------------------------===// +// + +#include "llvm/ADT/StringMap.h" + +namespace llvm { +class Function; +class GlobalVariable; +class Module; +class Regex; +class StringRef; + +class BlackList { + public: + BlackList(const StringRef Path); + // Returns whether either this function or it's source file are blacklisted. + bool isIn(const Function &F); + // Returns whether either this global or it's source file are blacklisted. + bool isIn(const GlobalVariable &G); + // Returns whether this module is blacklisted by filename. + bool isIn(const Module &M); + // Returns whether a global should be excluded from initialization checking. + bool isInInit(const GlobalVariable &G); + private: + StringMap<Regex*> Entries; + + bool inSection(const StringRef Section, const StringRef Query); +}; + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 09e0f14451..642908120b 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/TargetFolder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Instrumentation.h" using namespace llvm; @@ -48,10 +49,12 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetData>(); + AU.addRequired<TargetLibraryInfo>(); } private: const TargetData *TD; + const TargetLibraryInfo *TLI; ObjectSizeOffsetEvaluator *ObjSizeEval; BuilderTy *Builder; Instruction *Inst; @@ -166,11 +169,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { bool BoundsChecking::runOnFunction(Function &F) { TD = &getAnalysis<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); TrapBB = 0; BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); Builder = &TheBuilder; - ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext()); ObjSizeEval = &TheObjSizeEval; // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 00de882f17..058f68c7ce 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,8 +1,8 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BlackList.cpp BoundsChecking.cpp EdgeProfiling.cpp - FunctionBlackList.cpp GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp deleted file mode 100644 index 188ea4d9b3..0000000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// - -#include "FunctionBlackList.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Function.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" - -namespace llvm { - -FunctionBlackList::FunctionBlackList(const std::string &Path) { - Functions = NULL; - const char *kFunPrefix = "fun:"; - if (!Path.size()) return; - std::string Fun; - - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { - report_fatal_error("Can't open blacklist file " + Path + ": " + - EC.message()); - } - MemoryBuffer *Buff = File.take(); - const char *Data = Buff->getBufferStart(); - size_t DataLen = Buff->getBufferSize(); - SmallVector<StringRef, 16> Lines; - SplitString(StringRef(Data, DataLen), Lines, "\n\r"); - for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { - if (Lines[i].startswith(kFunPrefix)) { - std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - std::string ThisFuncRE; - // add ThisFunc replacing * with .* - for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { - if (ThisFunc[j] == '*') - ThisFuncRE += '.'; - ThisFuncRE += ThisFunc[j]; - } - // Check that the regexp is valid. - Regex CheckRE(ThisFuncRE); - std::string Error; - if (!CheckRE.isValid(Error)) - report_fatal_error("malformed blacklist regex: " + ThisFunc + - ": " + Error); - // Append to the final regexp. - if (Fun.size()) - Fun += "|"; - Fun += ThisFuncRE; - } - } - if (Fun.size()) { - Functions = new Regex(Fun); - } -} - -bool FunctionBlackList::isIn(const Function &F) { - if (Functions) { - bool Res = Functions->match(F.getName()); - return Res; - } - return false; -} - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h deleted file mode 100644 index c1239b9b7e..0000000000 --- a/lib/Transforms/Instrumentation/FunctionBlackList.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions based on -// user-supplied blacklist. -// -//===----------------------------------------------------------------------===// -// - -#include <string> - -namespace llvm { -class Function; -class Regex; - -// Blacklisted functions are not instrumented. -// The blacklist file contains one or more lines like this: -// --- -// fun:FunctionWildCard -// --- -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -class FunctionBlackList { - public: - FunctionBlackList(const std::string &Path); - bool isIn(const Function &F); - private: - Regex *Functions; -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 264a6a6153..9fcde316c0 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -88,11 +88,10 @@ namespace { // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *, - MDNode *>, 8> &); + void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - std::string mangleName(DICompileUnit CU, std::string NewStem); + std::string mangleName(DICompileUnit CU, const char *NewStem); bool EmitNotes; bool EmitData; @@ -329,7 +328,7 @@ namespace { }; } -std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) { +std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) { for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) { MDNode *N = GCov->getOperand(i); @@ -630,7 +629,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { } void GCOVProfiler::insertCounterWriteout( - SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) { + ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = Function::Create(WriteoutFTy, @@ -652,7 +651,7 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(compile_unit, "gcda"); Builder.CreateCall(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda)); - for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator + for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator I = CountersBySP.begin(), E = CountersBySP.end(); I != E; ++I) { DISubprogram SP(I->second); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index dc0fa7175d..17b7775434 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,7 +21,7 @@ #define DEBUG_TYPE "tsan" -#include "FunctionBlackList.h" +#include "BlackList.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" @@ -50,7 +50,7 @@ static cl::opt<std::string> ClBlackListFile("tsan-blacklist", STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); -STATISTIC(NumOmittedReadsBeforeWrite, +STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); @@ -77,7 +77,7 @@ struct ThreadSanitizer : public FunctionPass { int getMemoryAccessFuncIndex(Value *Addr); TargetData *TD; - OwningPtr<FunctionBlackList> BL; + OwningPtr<BlackList> BL; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; @@ -121,7 +121,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new FunctionBlackList(ClBlackListFile)); + BL.reset(new BlackList(ClBlackListFile)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -186,7 +186,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { NumOmittedReadsFromConstantGlobals++; return true; } - } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. NumOmittedReadsFromVtable++; @@ -344,7 +344,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { case NotAtomic: assert(false); case Unordered: // Fall-through. case Monotonic: v = 1 << 0; break; - // case Consume: v = 1 << 1; break; // Not specified yet. + // case Consume: v = 1 << 1; break; // Not specified yet. case Acquire: v = 1 << 2; break; case Release: v = 1 << 3; break; case AcquireRelease: v = 1 << 4; break; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index a8deda8b74..59121078cb 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - RecursivelyDeleteTriviallyDeadInstructions(Repl); + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); if (IterHandle != CurInstIterator) { // If the iterator instruction was recursively deleted, start over at the @@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { } +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { - // If we have a SelectInst that will likely profit from branch prediction, - // turn it into a branch. - if (DisableSelectToBranch || OptSize || !TLI || - !TLI->isPredictableSelectExpensive()) - return false; + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); - if (!SI->getCondition()->getType()->isIntegerTy(1) || - !isFormingBranchFromSelectProfitable(SI)) + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) return false; + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(SI)) + return false; + } + ModifiedDT = true; // First, we split the block containing the select into 2 blocks. diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8dbcc23d7e..086f0a1a71 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -22,6 +22,7 @@ #include "llvm/Instruction.h" #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -38,10 +39,11 @@ namespace { initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry()); } virtual bool runOnBasicBlock(BasicBlock &BB) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { Inst->eraseFromParent(); Changed = true; ++DIEEliminated; @@ -87,6 +89,8 @@ char DCE::ID = 0; INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false) bool DCE::runOnFunction(Function &F) { + TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); + // Start out with all of the instructions in the worklist... std::vector<Instruction*> WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) @@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) { Instruction *I = WorkList.back(); WorkList.pop_back(); - if (isInstructionTriviallyDead(I)) { // If the instruction is dead. + if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead. // Loop over all of the values that the instruction uses, if there are // instructions being used, add them to the worklist, because they might // go dead after this one is removed. diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8b1283ff25..25a1dd770a 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, SmallSetVector<Value*, 16> *ValueSet = 0) { SmallVector<Instruction*, 32> NowDeadInsts; @@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I, if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { uint64_t Size; - if (getObjectSize(V, Size, AA.getTargetData())) + if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo())) return Size; return AliasAnalysis::UnknownSize; } @@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst)) { + if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { MadeChange |= HandleFree(F); continue; } @@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD); + DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); + DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); + DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); ++NumFastStores; MadeChange = true; @@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && + !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI)) { + if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), + &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI)) + if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 975954953b..26271133e6 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *Inst = I++; // Dead instructions should just be removed. - if (isInstructionTriviallyDead(Inst)) { + if (isInstructionTriviallyDead(Inst, TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 4822fd0944..bce43bbdae 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) +#ifndef NDEBUG void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), @@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) { } errs() << "}\n"; } +#endif /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep @@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) || + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, @@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; @@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; + for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i) + ++SwitchEdges[SI->getSuccessor(i)]; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - BasicBlockEdge E(Parent, Dst); - if (E.isSingleEdge()) + // If there is only a single edge, propagate the case value into it. + if (SwitchEdges.lookup(Dst) == 1) { + BasicBlockEdge E(Parent, Dst); Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); + } } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 37f8bdfbff..c933a178d7 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -44,6 +44,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -68,6 +69,7 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; SmallVector<WeakVH, 16> DeadInsts; bool Changed; @@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // new comparison. NewCompare->takeName(Compare); Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); + RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); // Delete the old floating point increment. Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); + RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); // If the FP induction variable still has uses, this is because something else // in the loop uses its value. In order to canonicalize the induction @@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", PN->getParent()->getFirstInsertionPt()); PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } Changed = true; } @@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (NumPreds == 1) { // Completely replace a single-pred PHI. This is safe, because the // NewVal won't be variant in the loop, so we don't need an LCSSA phi // node anymore. PN->replaceAllUsesWith(ExitVal); - RecursivelyDeleteTriviallyDeadInstructions(PN); + RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); } } if (NumPreds != 1) { @@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DeadInsts.clear(); Changed = false; @@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { while (!DeadInsts.empty()) if (Instruction *Inst = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); // The Rewriter may not be used from this point on. @@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SinkUnusedInvariants(L); // Clean up dead instructions. - Changed |= DeleteDeadPHIs(L->getHeader()); + Changed |= DeleteDeadPHIs(L->getHeader(), TLI); // Check a post-condition. assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dd42c59059..20844c6cd5 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. - SimplifyInstructionsInBlock(NewBB, TD); + SimplifyInstructionsInBlock(NewBB, TD, TLI); // Threaded an edge! ++NumThreads; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0192e928fe..99bedce6c7 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -108,6 +108,9 @@ namespace { BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... + bool MayThrow; // The current loop contains an instruction which + // may throw, thus preventing code motion of + // instructions with side effects. DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. @@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { CurAST->add(*BB); // Incorporate the specified basic block } + MayThrow = false; + // TODO: We've already searched for instructions which may throw in subloops. + // We may want to reuse this information. + for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end(); + (BB != BBE) && !MayThrow ; ++BB) + for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); + (I != E) && !MayThrow; ++I) + MayThrow |= I->mayThrow(); + // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of @@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. - if (isInstructionTriviallyDead(&I)) { + if (isInstructionTriviallyDead(&I, TLI)) { DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); ++II; CurAST->deleteValue(&I); @@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { if (!FoundMod) return true; } - // FIXME: This should use mod/ref information to see if we can hoist or sink - // the call. + // FIXME: This should use mod/ref information to see if we can hoist or + // sink the call. return false; } - // Otherwise these instructions are hoistable/sinkable - return isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I); + // Only these instructions are hoistable/sinkable. + bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || + isa<CmpInst>(I) || isa<InsertElementInst>(I) || + isa<ExtractElementInst>(I) || + isa<ShuffleVectorInst>(I)); + if (!HoistableKind) + return false; + + return isSafeToExecuteUnconditionally(I); } /// isNotUsedInLoop - Return true if the only users of this instruction are @@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { } bool LICM::isGuaranteedToExecute(Instruction &Inst) { + + // Somewhere in this loop there is an instruction which may throw and make us + // exit the loop. + if (MayThrow) + return false; + // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop // which does not execute this instruction, so we can't hoist it. diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ac1082cbfb..a72e288303 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { if (!Op->use_empty()) continue; if (Instruction *OpI = dyn_cast<Instruction>(Op)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) NowDeadInsts.push_back(OpI); } @@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { /// deleteIfDeadInstruction - If the specified value is a dead instruction, /// delete it and any recursively used instructions. -static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) { if (Instruction *I = dyn_cast<Instruction>(V)) - if (isInstructionTriviallyDead(I)) - deleteDeadInstruction(I, SE); + if (isInstructionTriviallyDead(I, TLI)) + deleteDeadInstruction(I, SE, TLI); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(BasePtr, *SE); + deleteIfDeadInstruction(BasePtr, *SE, TLI); return false; } @@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE, TLI); ++NumMemSet; return true; } @@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, StoreSize, getAnalysis<AliasAnalysis>(), SI)) { Expander.clear(); // If we generated new code for the base pointer, clean up. - deleteIfDeadInstruction(LoadBasePtr, *SE); - deleteIfDeadInstruction(StoreBasePtr, *SE); + deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); + deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); return false; } @@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE, TLI); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 982400c5a3..f5daa7b44e 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I); + LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); if (IsSubloopHeader && !isa<PHINode>(I)) break; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7eeb1527ad..abe07aa9d3 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) { return false; BasicBlock *OrigHeader = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); if (BI == 0 || BI->isUnconditional()) @@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) { if (!L->isLoopExiting(OrigHeader)) return false; - // Updating PHInodes in loops with multiple exits adds complexity. - // Keep it simple, and restrict loop rotation to loops with one exit only. - // In future, lift this restriction and support for multiple exits if - // required. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() > 1) + // If the loop latch already contains a branch that leaves the loop then the + // loop is already rotated. + if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; // Check size of original header and reject loop if it is very big. @@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); - BasicBlock *OrigLatch = L->getLoopLatch(); // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0 || OrigLatch == 0) + if (OrigPreheader == 0) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes @@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) { if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) SE->forgetLoop(L); + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { - // Since OrigPreheader now has the conditional branch to Exit block, it is - // the dominator of Exit. - DT->changeImmediateDominator(Exit, OrigPreheader); - DT->changeImmediateDominator(NewHeader, OrigPreheader); + // Everything that was dominated by the old loop header is now dominated + // by the original loop preheader. Conceptually the header was merged + // into the preheader, even though we reuse the actual block as a new + // loop latch. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(OrigHeader, OrigLatch); @@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) { // Update OrigHeader to be dominated by the new header block. DT->changeImmediateDominator(NewHeader, OrigPreheader); DT->changeImmediateDominator(OrigHeader, OrigLatch); + + // Brute force incremental dominator tree update. Call + // findNearestCommonDominator on all CFG predecessors of each child of the + // original header. + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); + SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), + OrigHeaderNode->end()); + bool Changed; + do { + Changed = false; + for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { + DomTreeNode *Node = HeaderChildren[I]; + BasicBlock *BB = Node->getBlock(); + + pred_iterator PI = pred_begin(BB); + BasicBlock *NearestDom = *PI; + for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + + // Remember if this changes the DomTree. + if (Node->getIDom()->getBlock() != NearestDom) { + DT->changeImmediateDominator(BB, NearestDom); + Changed = true; + } + } + + // If the dominator changed, this may have an effect on other + // predecessors, continue until we reach a fixpoint. + } while (Changed); } } @@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // emitted code isn't too gross in this common case. MergeBlockIntoPredecessor(OrigHeader, this); + DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0ae7a5151e..d7495da5ef 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const { OS << "[NumUses=" << UsedByIndices.count() << ']'; } +#ifndef NDEBUG void RegSortData::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const { } } +#ifndef NDEBUG void Formula::dump() const { print(errs()); errs() << '\n'; } +#endif /// isAddRecSExtable - Return true if the given addrec can be sign-extended /// without changing its value. @@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const { OS << ", plus " << SetupCost << " setup cost"; } +#ifndef NDEBUG void Cost::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", Offset=" << Offset; } +#ifndef NDEBUG void LSRFixup::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { @@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const { OS << ", widest fixup type: " << *WidestFixupType; } +#ifndef NDEBUG void LSRUse::dump() const { print(errs()); errs() << '\n'; } +#endif /// isLegalUse - Test whether the use described by AM is "legal", meaning it can /// be completely folded into the user instruction at isel time. This includes @@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const { << " , add offset " << Imm; } +#ifndef NDEBUG void WorkItem::dump() const { print(errs()); errs() << '\n'; } +#endif /// GenerateCrossUseConstantOffsets - Look for registers which are a constant /// distance apart and try to form reuse opportunities between them. @@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const { print_uses(OS); } +#ifndef NDEBUG void LSRInstance::dump() const { print(errs()); errs() << '\n'; } +#endif namespace { diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 3222f2083b..dce8e8beb6 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { // An ObjC-Identified object can't alias a load if it is never locally stored. if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return isStoredObjCPointer(A); if (BIsIdentified) { - // If both pointers have provenance, they can be directly compared. - if (A != B) - return false; - } else { - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return isStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; } - } else { - if (BIsIdentified && isa<LoadInst>(A)) + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) return isStoredObjCPointer(B); } @@ -1381,9 +1384,6 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// NestCount - The known minimum level of retain+release nesting. - unsigned NestCount; - /// KnownPositiveRefCount - True if the reference count is known to /// be incremented. bool KnownPositiveRefCount; @@ -1401,7 +1401,7 @@ namespace { /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {} void SetKnownPositiveRefCount() { @@ -1416,18 +1416,6 @@ namespace { return KnownPositiveRefCount; } - void IncrementNestCount() { - if (NestCount != UINT_MAX) ++NestCount; - } - - void DecrementNestCount() { - if (NestCount != 0) --NestCount; - } - - bool IsKnownNested() const { - return NestCount > 0; - } - void SetSeq(Sequence NewSeq) { Seq = NewSeq; } @@ -1454,7 +1442,6 @@ void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) @@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { return AutoreleaseCallee; } +/// IsPotentialUse - Test whether the given value is possible a +/// reference-counted pointer, including tests which utilize AliasAnalysis. +static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialUse(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + /// CanAlterRefCount - Test whether the given instruction can result in a /// reference count modification (positive or negative) for the pointer's /// object. @@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr, for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { const Value *Op = *I; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, // Comparing a pointer with null, or any other constant, isn't really a use, // because we don't care what the pointer points to, or about the values // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1))) + if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) return false; } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); // If we can't tell what the underlying object was, assume there is a // dependence. - return IsPotentialUse(Op) && PA.related(Op, Ptr); + return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); } // Check each operand for a match. for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); OI != OE; ++OI) { const Value *Op = *OI; - if (IsPotentialUse(Op) && PA.related(Ptr, Op)) + if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) return true; } return false; @@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: @@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - S.DecrementNestCount(); switch (S.GetSeq()) { case S_Stop: @@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - for (BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); SI != SE; ++SI) { + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { const BasicBlock *Succ = *SI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); assert(I != BBStates.end()); @@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergeSucc(I->second); } - break; } // Visit all the instructions, bottom-up. @@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); + S.RRI.KnownSafe = S.IsKnownIncremented(); S.RRI.Calls.insert(Inst); } - S.IncrementNestCount(); + S.SetKnownPositiveRefCount(); // A retain can be a potential use; procede to the generic checking // code below. @@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementNestCount(); + S.ClearRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { const BasicBlock *Pred = *PI; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); assert(I != BBStates.end()); @@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, assert(I != BBStates.end()); MyStates.MergePred(I->second); } - break; } // Visit all the instructions, top-down. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index d13e4abff9..6d27db1b96 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() { return new CFGSimplifyPass(); } -/// ChangeToUnreachable - Insert an unreachable instruction before the specified +/// changeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { +static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { } } -/// ChangeToCall - Convert the specified invoke into a normal call. -static void ChangeToCall(InvokeInst *II) { +/// changeToCall - Convert the specified invoke into a normal call. +static void changeToCall(InvokeInst *II) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) { II->eraseFromParent(); } -static bool MarkAliveBlocks(BasicBlock *BB, +static bool markAliveBlocks(BasicBlock *BB, SmallPtrSet<BasicBlock*, 128> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; @@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - ChangeToUnreachable(BBI, false); + changeToUnreachable(BBI, false); Changed = true; } break; @@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, true); + changeToUnreachable(SI, true); Changed = true; break; } @@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - ChangeToUnreachable(II, true); + changeToUnreachable(II, true); Changed = true; } else if (II->doesNotThrow()) { if (II->use_empty() && II->onlyReadsMemory()) { @@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, II->getUnwindDest()->removePredecessor(II->getParent()); II->eraseFromParent(); } else - ChangeToCall(II); + changeToCall(II); Changed = true; } } @@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -static bool RemoveUnreachableBlocksFromFn(Function &F) { +static bool removeUnreachableBlocksFromFn(Function &F) { SmallPtrSet<BasicBlock*, 128> Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable); + bool Changed = markAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } -/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. -static bool MergeEmptyReturnBlocks(Function &F) { +static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; @@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) { return Changed; } -/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, +/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { +static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetData *TD = getAnalysisIfAvailable<TargetData>(); - bool EverChanged = RemoveUnreachableBlocksFromFn(F); - EverChanged |= MergeEmptyReturnBlocks(F); - EverChanged |= IterativeSimplifyCFG(F, TD); + bool EverChanged = removeUnreachableBlocksFromFn(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; - // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, - // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of - // RemoveUnreachableBlocksFromFn doesn't do anything. - if (!RemoveUnreachableBlocksFromFn(F)) + // avoid reruning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocksFromFn doesn't do anything. + if (!removeUnreachableBlocksFromFn(F)) return true; do { - EverChanged = IterativeSimplifyCFG(F, TD); - EverChanged |= RemoveUnreachableBlocksFromFn(F); + EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 3904419012..65311fe72d 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -38,6 +39,10 @@ using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); STATISTIC(NumAnnotated, "Number of attributes added to library functions"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); //===----------------------------------------------------------------------===// // Optimizer Base Class //===----------------------------------------------------------------------===// @@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization { //===----------------------------------------------------------------------===// //===---------------------------------------===// -// 'cos*' Optimizations +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; +//===---------------------------------------===// +// 'cos*' Optimizations struct CosOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; // cos(-x) -> cos(x) Value *Op1 = CI->getArgOperand(0); @@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization { BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); } - return 0; + return Ret; } }; @@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization { struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { @@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization { } ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); - if (Op2C == 0) return 0; + if (Op2C == 0) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization { struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B); + } + FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the // result type. if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return Ret; Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 @@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization { return CI; } - return 0; - } -}; - -//===---------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' - -struct UnaryDoubleFPOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) - return 0; - - // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; - - // floor((double)floatval) -> (double)floorf(floatval) - Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); - return B.CreateFPExt(V, B.getDoubleTy()); + return Ret; } }; @@ -1534,7 +1571,8 @@ namespace { StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; // Integer Optimizations FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii; ToAsciiOpt ToAscii; @@ -1547,10 +1585,13 @@ namespace { public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), - StpCpy(false), StpCpyChk(true) { + StpCpy(false), StpCpyChk(true), + UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); + void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + void InitOptimizations(); bool runOnFunction(Function &F); @@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { Optimizations[TLI->getName(F)] = Opt; } +void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) - Optimizations["fabs"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) - Optimizations["floor"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) - Optimizations["ceil"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) - Optimizations["round"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) - Optimizations["rint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) - Optimizations["nearbyint"] = &UnaryDoubleFP; - if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) - Optimizations["trunc"] = &UnaryDoubleFP; + AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index d83145289c..1e6586bf0d 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } +#ifndef NDEBUG void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2679b933f6..75a7817563 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -bool llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector<WeakVH, 8> PHIs; @@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); return Changed; } diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000000..b694779a53 --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,251 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace llvm { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static bool insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (isa<ConstantInt>(Divisor) || + (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return false; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + ++I; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) + Instr->replaceAllUsesWith(QuoPhi); + else + Instr->replaceAllUsesWith(RemPhi); + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); + return true; +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static bool reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) { + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); + } + + // Replace operation value with previously generated phi node + DivPhiNodes &Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + ++J; + + // Remove redundant operation + Instr->eraseFromParent(); + return true; +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(Function &F, + Function::iterator &I, + const llvm::DenseMap<Type *, Type *> &BypassTypeMap) { + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) + continue; + + // Continue if div/rem type is not bypassed + DenseMap<Type *, Type *>::const_iterator BT = + BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) + continue; + + IntegerType *BypassType = cast<IntegerType>(BT->second); + MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, + UseSignedOp, DivCache); + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31cae62..215a16ff3f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bed7d72fff..0601433565 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -52,7 +52,8 @@ using namespace llvm; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } return false; @@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = SI->getCondition(); SI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } @@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address); + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool llvm::isInstructionTriviallyDead(Instruction *I) { +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; // We don't want the landingpad instruction removed by anything this general. @@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa<UndefValue>(II->getArgOperand(1)); } - if (isAllocLikeFn(I)) return true; + if (isAllocLikeFn(I, TLI)) return true; - if (CallInst *CI = isFreeCall(I)) + if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0))) return C->isNullValue() || isa<UndefValue>(C); @@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { Instruction *I = dyn_cast<Instruction>(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector<Instruction*, 16> DeadInsts; @@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. if (Instruction *OpI = dyn_cast<Instruction>(OpV)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } @@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) { /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { SmallPtrSet<Instruction*, 4> Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); I = cast<Instruction>(*I->use_begin())) { if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I); + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. if (!Visited.insert(I)) { // Break the cycle and delete the instruction and its operands. I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); return true; } } @@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; #ifndef NDEBUG @@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { continue; } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (BIHandle != BI) BI = BB->begin(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 518df7cdda..3df309958b 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,6 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -101,14 +103,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -118,7 +120,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + return true; } @@ -135,7 +137,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: + // common successors: // 1> We have a constant incoming value for the conditional branch; // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. @@ -170,7 +172,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) @@ -222,7 +224,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -252,7 +254,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -345,7 +347,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -411,7 +413,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, const TargetData *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -420,21 +422,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -467,12 +469,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -484,7 +486,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } @@ -615,6 +617,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. std::vector<ValueEqualityComparisonCase> PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), @@ -634,7 +639,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa<BranchInst>(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -652,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast<SwitchInst>(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet<Constant*, 16> DeadCases; @@ -673,7 +678,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; @@ -729,8 +734,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) @@ -738,6 +743,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Tries to get a branch weight for the given instruction, returns NULL if it +/// can't. Pos starts at 0. +static ConstantInt* GetWeight(Instruction* I, int Pos) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) { + if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) { + if (MDS->getString().equals("branch_weights")) { + assert(ProfMD->getNumOperands() >= 3); + return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos)); + } + } + } + + return 0; +} + +/// Scale the given weights based on the successor TI's metadata. Scaling is +/// done by multiplying every weight by the sum of the successor's weights. +static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) { + // Sum the successor's weights + assert(HasBranchWeights(STI)); + unsigned Scale = 0; + MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof); + for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i)); + assert(CI); + Scale += CI->getValue().getZExtValue(); + } + + // Skip default, as it's replaced during the folding + for (unsigned i = 1; i < Weights.size(); ++i) { + Weights[i] *= Scale; + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef<uint64_t> Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -770,6 +836,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector<BasicBlock*, 8> NewSuccessors; + // Update the branch weight metadata along the way + SmallVector<uint64_t, 8> Weights; + uint64_t PredDefaultWeight = 0; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If the predecessor is a conditional eq, then swap the default weight + // to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + std::swap(Weights.front(), Weights.back()); + } + } + + PredDefaultWeight = Weights.front(); + } else if (SuccHasWeights) { + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + PredDefaultWeight = 1; + } + + uint64_t SuccDefaultWeight = 0; + if (SuccHasWeights) { + int Index = 0; + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition()); + assert(ICI); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Index = 1; + } + + SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue(); + } + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. @@ -780,6 +895,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights) { + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -790,14 +911,35 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + if (SuccHasWeights) { + ScaleWeights(TI, Weights); + Weights.front() *= SuccDefaultWeight; + } else if (PredHasWeights) { + Weights.front() /= (1 + BBCases.size()); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights) { + Weights.push_back(PredDefaultWeight * + GetWeight(TI, i)->getValue().getZExtValue()); + } else if (PredHasWeights) { + // Split the old default's weight amongst the children + assert(PredDefaultWeight != 0); + Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); + } } } else { + // FIXME: preserve branch weight metadata, similarly to the 'then' + // above. For now, drop it. + PredHasWeights = false; + SuccHasWeights = false; + // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. @@ -822,7 +964,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = + for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); @@ -851,6 +993,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + EraseTerminatorInstAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must @@ -984,11 +1137,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast<SelectInst> (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1056,7 +1209,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); if (OpI && OpI->getParent() == BIParent && @@ -1112,7 +1265,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // as well. if (PHIs.empty()) return false; - + // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); @@ -1162,13 +1315,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa<DbgInfoIntrinsic>(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1176,7 +1329,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB || isa<PHINode>(U)) return false; } - + // Looks ok, continue checking. } @@ -1194,31 +1347,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa<IndirectBrInst>(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1227,7 +1380,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1244,7 +1397,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1252,7 +1405,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1297,7 +1450,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa<ConstantInt>(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1307,14 +1460,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet<Instruction*, 4> AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) { PHINode *PN = cast<PHINode>(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1322,19 +1475,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast<PHINode>(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1342,7 +1495,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa<BinaryOperator>(PN->getIncomingValue(1)) || isa<BinaryOperator>(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1362,7 +1515,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1375,15 +1528,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder<true, NoFolder> Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1394,19 +1547,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1420,14 +1573,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1447,12 +1600,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1460,7 +1613,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1472,12 +1625,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1491,15 +1644,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; @@ -1600,7 +1753,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (Cond == 0) return false; } - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1623,7 +1776,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; } @@ -1631,13 +1784,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1649,7 +1802,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; @@ -1659,22 +1812,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. SmallVector<PHINode*, 4> PHIs; if (PBI == 0 || PBI->isUnconditional() || - (BI->isConditional() && + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond = false; - + if (BI->isConditional()) { if (PBI->getSuccessor(0) == TrueDest) Opc = Instruction::Or; @@ -1693,7 +1846,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst @@ -1707,47 +1860,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallVector<std::pair<Value*, unsigned>, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair<Value*, unsigned> Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast<Instruction>(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) { CmpInst *CI = cast<CmpInst>(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1756,7 +1909,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1764,9 +1917,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - + if (BI->isConditional()) { - Instruction *NewCond = + Instruction *NewCond = cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); @@ -1806,7 +1959,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::And, PBI->getCondition(), New, "and.cond")); @@ -1814,7 +1967,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *NotCond = cast<Instruction>(Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = + MergedCond = cast<Instruction>(Builder.CreateBinOp(Instruction::Or, NotCond, MergedCond, "or.cond")); @@ -1921,7 +2074,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1936,7 +2089,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1945,11 +2098,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1969,18 +2122,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1991,11 +2144,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2007,31 +2160,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2046,13 +2199,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder<true, NoFolder> Builder(PBI); @@ -2065,16 +2218,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -2092,10 +2245,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -2214,7 +2367,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, @@ -2228,17 +2381,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false; - + SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -2246,7 +2399,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); @@ -2254,7 +2407,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. @@ -2264,13 +2417,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2297,7 +2450,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2313,8 +2466,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2323,7 +2476,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2332,7 +2485,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2344,21 +2497,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2372,13 +2528,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2392,14 +2548,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2410,10 +2566,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2467,7 +2623,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector<BasicBlock*, 8> UncondBranchPreds; SmallVector<BranchInst*, 8> CondBranchPreds; @@ -2481,7 +2637,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2490,21 +2646,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) && isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) && @@ -2516,9 +2672,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - + bool Changed = false; - + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2558,11 +2714,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2615,7 +2771,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2627,13 +2783,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // edges to it. SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa<PHINode>(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == MaxBlock) { @@ -2648,7 +2804,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2663,7 +2819,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2823,6 +2979,288 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +static bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa<ConstantFP>(C) || + isa<ConstantInt>(C) || + isa<ConstantPointerNull>(C) || + isa<GlobalValue>(C) || + isa<UndefValue>(C); +} + +/// GetCaseResulsts - Try to determine the resulting constant values in phi +/// nodes at the common destination basic block for one of the case +/// destinations of a switch instruction. +static bool GetCaseResults(SwitchInst *SI, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty, continue to its successor. + if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && + !isa<PHINode>(CaseDest->begin())) { + + TerminatorInst *Terminator = CaseDest->getTerminator(); + if (Terminator->getNumSuccessors() != 1) + return false; + + Pred = CaseDest; + CaseDest = Terminator->getSuccessor(0); + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast<PHINode>(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx)); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +/// BuildLookupTable - Build a lookup table with the contents of Results, using +/// DefaultResult to fill the holes in the table. If the table ends up +/// containing the same result in each element, set *SingleResult to that value +/// and return NULL. +static GlobalVariable *BuildLookupTable( + Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const std::vector<std::pair<ConstantInt*,Constant*> >& Results, + Constant *DefaultResult, + Constant **SingleResult) { + assert(Results.size() && "Need values to build lookup table"); + assert(TableSize >= Results.size() && "Table needs to hold all values"); + + // If all values in the table are equal, this is that value. + Constant *SameResult = Results.begin()->second; + + // Build up the table contents. + std::vector<Constant*> TableContents(TableSize); + for (size_t I = 0, E = Results.size(); I != E; ++I) { + ConstantInt *CaseVal = Results[I].first; + Constant *CaseRes = Results[I].second; + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SameResult) + SameResult = NULL; + } + + // Fill in any holes in the table with the default result. + if (Results.size() < TableSize) { + for (unsigned i = 0; i < TableSize; ++i) { + if (!TableContents[i]) + TableContents[i] = DefaultResult; + } + + if (DefaultResult != SameResult) + SameResult = NULL; + } + + // Same result was used in the entire table; just return that. + if (SameResult) { + *SingleResult = SameResult; + return NULL; + } + + ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + GV->setUnnamedAddr(true); + return GV; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + // FIXME: Handle unreachable cases. + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: If the results are all integers and the lookup table would fit in a + // target-legal register, we should store them as a bitmap and use shift/mask + // to look up the result. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases are too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = NULL; + typedef std::vector<std::pair<ConstantInt*, Constant*> > ResultListTy; + SmallDenseMap<PHINode*, ResultListTy> ResultLists; + SmallDenseMap<PHINode*, Constant*> DefaultResults; + SmallDenseMap<PHINode*, Type*> ResultTypes; + SmallVector<PHINode*, 4> PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + { + SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + // The table density should be at lest 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + // Be careful to avoid overlow in the density computation. + if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) + return false; + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (SI->getNumCases() * 10 < TableSize * 4) + return false; + + // Build the lookup tables. + SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; + SmallDenseMap<PHINode*, Constant*> SingleResults; + + Module &Mod = *CommonDest->getParent()->getParent(); + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + + Constant *SingleResult = NULL; + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, + ResultLists[PHI], DefaultResults[PHI], + &SingleResult); + SingleResults[PHI] = SingleResult; + } + + // Create the BB that does the lookups. + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + // There was a single result for this phi; just use that. + if (Constant *SingleResult = SingleResults[PHI]) { + PHI->addIncoming(SingleResult, LookupBB); + continue; + } + + Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; + Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, + "switch.gep"); + Value *Result = Builder.CreateLoad(GEP, "switch.load"); + + // If the result is only going to be used to return from the function, + // we want to do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) { + if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + } + } + + if (!ReturnedEarly) + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -2862,13 +3300,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder)) + return SimplifyCFG(BB) | true; + return false; } bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet<Value *, 8> Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2879,7 +3320,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2887,14 +3328,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return SimplifyCFG(BB) | true; @@ -2904,13 +3345,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) @@ -2921,7 +3362,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value @@ -2934,7 +3375,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2943,7 +3384,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; - + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2962,17 +3403,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return SimplifyCFG(BB) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) return SimplifyCFG(BB) | true; - + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2999,14 +3440,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) @@ -3114,7 +3555,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 81eb9e0f8a..528e6a1e42 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -72,7 +72,7 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 62d23cb948..c09dcd2ea9 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -601,7 +601,7 @@ namespace { // It is important to cleanup here so that future iterations of this // function have less work to do. - (void) SimplifyInstructionsInBlock(&BB, TD); + (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo()); return true; } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index c09c69b6e8..f3f24ae5c8 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1029,6 +1029,9 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Out << "sideeffect "; if (IA->isAlignStack()) Out << "alignstack "; + // We don't emit the AD_ATT dialect as it's the assumed default. + if (IA->getDialect() == InlineAsm::AD_Intel) + Out << "inteldialect "; Out << '"'; PrintEscapedString(IA->getAsmString(), Out); Out << "\", \""; diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index c8219eb787..d466ac60b2 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -88,9 +88,6 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); Result += " "; } - if (Attrs & Attribute::IANSDialect) - Result += "ia_nsdialect "; - // Trim the trailing space. assert(!Result.empty() && "Unknown attribute!"); Result.erase(Result.end()-1); diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 6a20be6f34..c17e79454e 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -42,7 +42,7 @@ add_llvm_library(LLVMCore # Workaround: It takes over 20 minutes to compile with msvc10. # FIXME: Suppressing optimizations to core libraries would not be good thing. -if( MSVC_VERSION EQUAL 1600 ) +if( MSVC_VERSION LESS 1700 ) set_property( SOURCE Function.cpp PROPERTY COMPILE_FLAGS "/Og-" diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 8903a8f40f..0f81b3ee4e 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -352,18 +352,21 @@ struct ExprMapKeyType { struct InlineAsmKeyType { InlineAsmKeyType(StringRef AsmString, StringRef Constraints, bool hasSideEffects, - bool isAlignStack) + bool isAlignStack, InlineAsm::AsmDialect asmDialect) : asm_string(AsmString), constraints(Constraints), - has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {} + has_side_effects(hasSideEffects), is_align_stack(isAlignStack), + asm_dialect(asmDialect) {} std::string asm_string; std::string constraints; bool has_side_effects; bool is_align_stack; + InlineAsm::AsmDialect asm_dialect; bool operator==(const InlineAsmKeyType& that) const { return this->asm_string == that.asm_string && this->constraints == that.constraints && this->has_side_effects == that.has_side_effects && - this->is_align_stack == that.is_align_stack; + this->is_align_stack == that.is_align_stack && + this->asm_dialect == that.asm_dialect; } bool operator<(const InlineAsmKeyType& that) const { if (this->asm_string != that.asm_string) @@ -374,6 +377,8 @@ struct InlineAsmKeyType { return this->has_side_effects < that.has_side_effects; if (this->is_align_stack != that.is_align_stack) return this->is_align_stack < that.is_align_stack; + if (this->asm_dialect != that.asm_dialect) + return this->asm_dialect < that.asm_dialect; return false; } @@ -490,7 +495,8 @@ template<> struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> { static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) { return new InlineAsm(Ty, Key.asm_string, Key.constraints, - Key.has_side_effects, Key.is_align_stack); + Key.has_side_effects, Key.is_align_stack, + Key.asm_dialect); } }; @@ -499,7 +505,8 @@ struct ConstantKeyData<InlineAsm> { typedef InlineAsmKeyType ValType; static ValType getValType(InlineAsm *Asm) { return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(), - Asm->hasSideEffects(), Asm->isAlignStack()); + Asm->hasSideEffects(), Asm->isAlignStack(), + Asm->getDialect()); } }; diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index a56f1b282b..ab27f9b8b7 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1055,9 +1055,11 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, LLVMBool HasSideEffects, - LLVMBool IsAlignStack) { + LLVMBool IsAlignStack, + InlineAsm::AsmDialect AsmDialect) { return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString, - Constraints, HasSideEffects, IsAlignStack)); + Constraints, HasSideEffects, IsAlignStack, + AsmDialect)); } LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) { diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp index 003a5d4e4e..5bc1ac9f5d 100644 --- a/lib/VMCore/GCOV.cpp +++ b/lib/VMCore/GCOV.cpp @@ -28,19 +28,19 @@ GCOVFile::~GCOVFile() { } /// isGCDAFile - Return true if Format identifies a .gcda file. -static bool isGCDAFile(GCOVFormat Format) { - return Format == GCDA_402 || Format == GCDA_404; +static bool isGCDAFile(GCOV::GCOVFormat Format) { + return Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404; } /// isGCNOFile - Return true if Format identifies a .gcno file. -static bool isGCNOFile(GCOVFormat Format) { - return Format == GCNO_402 || Format == GCNO_404; +static bool isGCNOFile(GCOV::GCOVFormat Format) { + return Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404; } /// read - Read GCOV buffer. bool GCOVFile::read(GCOVBuffer &Buffer) { - GCOVFormat Format = Buffer.readGCOVFormat(); - if (Format == InvalidGCOV) + GCOV::GCOVFormat Format = Buffer.readGCOVFormat(); + if (Format == GCOV::InvalidGCOV) return false; unsigned i = 0; @@ -87,21 +87,21 @@ GCOVFunction::~GCOVFunction() { /// read - Read a aunction from the buffer. Return false if buffer cursor /// does not point to a function tag. -bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) { +bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { if (!Buff.readFunctionTag()) return false; Buff.readInt(); // Function header length Ident = Buff.readInt(); Buff.readInt(); // Checksum #1 - if (Format != GCNO_402) + if (Format != GCOV::GCNO_402) Buff.readInt(); // Checksum #2 Name = Buff.readString(); - if (Format == GCNO_402 || Format == GCNO_404) + if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404) Filename = Buff.readString(); - if (Format == GCDA_402 || Format == GCDA_404) { + if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) { Buff.readArcTag(); uint32_t Count = Buff.readInt() / 2; for (unsigned i = 0, e = Count; i != e; ++i) { diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 736e370a6d..2e636aacfd 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -27,19 +27,20 @@ InlineAsm::~InlineAsm() { InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString, StringRef Constraints, bool hasSideEffects, - bool isAlignStack) { - InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack); + bool isAlignStack, AsmDialect asmDialect) { + InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack, + asmDialect); LLVMContextImpl *pImpl = Ty->getContext().pImpl; return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key); } InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString, const std::string &constraints, bool hasSideEffects, - bool isAlignStack) + bool isAlignStack, AsmDialect asmDialect) : Value(Ty, Value::InlineAsmVal), - AsmString(asmString), - Constraints(constraints), HasSideEffects(hasSideEffects), - IsAlignStack(isAlignStack) { + AsmString(asmString), Constraints(constraints), + HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack), + Dialect(asmDialect) { // Do various checks on the constraint string and type. assert(Verify(getFunctionType(), constraints) && diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 4530c0495f..53f11499e4 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1189,7 +1189,7 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, assert(PassDebugging >= Details); if (Set.empty()) return; - dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; + dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { if (i) dbgs() << ','; const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index dac637335b..c4567a35b0 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -10,3 +10,11 @@ foreach(entry ${entries}) endif() endif() endforeach(entry) + +# Also add in the compiler-rt tree if present and we have a sufficiently +# recent version of CMake. +if(${CMAKE_VERSION} VERSION_GREATER 2.8.7 AND + IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt AND + EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt/CMakeLists.txt) + add_subdirectory(compiler-rt) +endif() diff --git a/runtime/libprofile/CMakeLists.txt b/runtime/libprofile/CMakeLists.txt index 414ad00b4a..8609715b33 100644 --- a/runtime/libprofile/CMakeLists.txt +++ b/runtime/libprofile/CMakeLists.txt @@ -13,7 +13,8 @@ set_target_properties( profile_rt-static PROPERTIES OUTPUT_NAME "profile_rt" ) -add_llvm_loadable_module( profile_rt-shared ${SOURCES} ) +set(BUILD_SHARED_LIBS ON) +add_llvm_library( profile_rt-shared ${SOURCES} ) set_target_properties( profile_rt-shared PROPERTIES OUTPUT_NAME "profile_rt" ) diff --git a/runtime/libprofile/Profiling.h b/runtime/libprofile/Profiling.h index c6b9a4d71c..acc6399a18 100644 --- a/runtime/libprofile/Profiling.h +++ b/runtime/libprofile/Profiling.h @@ -15,7 +15,7 @@ #ifndef PROFILING_H #define PROFILING_H -#include "llvm/Analysis/ProfileInfoTypes.h" /* for enum ProfilingType */ +#include "llvm/Analysis/ProfileDataTypes.h" /* for enum ProfilingType */ /* save_arguments - Save argc and argv as passed into the program for the file * we output. diff --git a/test/Analysis/BasicAA/noalias-geps.ll b/test/Analysis/BasicAA/noalias-geps.ll new file mode 100644 index 0000000000..a93d778da0 --- /dev/null +++ b/test/Analysis/BasicAA/noalias-geps.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +; Check that geps with equal base offsets of noalias base pointers stay noalias. +define i32 @test(i32* %p, i16 %i) { + %pi = getelementptr i32* %p, i32 0 + %pi.next = getelementptr i32* %p, i32 1 + %b = icmp eq i16 %i, 0 + br i1 %b, label %bb1, label %bb2 + +bb1: + %f = getelementptr i32* %pi, i32 1 + %g = getelementptr i32* %pi.next, i32 1 + br label %bb3 +bb2: + %f2 = getelementptr i32* %pi, i32 1 + %g2 = getelementptr i32* %pi.next, i32 1 + br label %bb3 + +bb3: + %ptr_phi = phi i32* [ %f, %bb1 ], [ %f2, %bb2 ] + %ptr_phi2 = phi i32* [ %g, %bb1 ], [ %g2, %bb2 ] +; CHECK: NoAlias: i32* %f1, i32* %g1 + %f1 = getelementptr i32* %ptr_phi , i32 1 + %g1 = getelementptr i32* %ptr_phi2 , i32 1 + +ret i32 0 +} + +; Check that geps with equal indices of noalias base pointers stay noalias. +define i32 @test2([2 x i32]* %p, i32 %i) { + %pi = getelementptr [2 x i32]* %p, i32 0 + %pi.next = getelementptr [2 x i32]* %p, i32 1 + %b = icmp eq i32 %i, 0 + br i1 %b, label %bb1, label %bb2 + +bb1: + %f = getelementptr [2 x i32]* %pi, i32 1 + %g = getelementptr [2 x i32]* %pi.next, i32 1 + br label %bb3 +bb2: + %f2 = getelementptr [2 x i32]* %pi, i32 1 + %g2 = getelementptr [2 x i32]* %pi.next, i32 1 + br label %bb3 +bb3: + %ptr_phi = phi [2 x i32]* [ %f, %bb1 ], [ %f2, %bb2 ] + %ptr_phi2 = phi [2 x i32]* [ %g, %bb1 ], [ %g2, %bb2 ] +; CHECK: NoAlias: i32* %f1, i32* %g1 + %f1 = getelementptr [2 x i32]* %ptr_phi , i32 1, i32 %i + %g1 = getelementptr [2 x i32]* %ptr_phi2 , i32 1, i32 %i + +ret i32 0 +} diff --git a/test/Analysis/BasicAA/phi-speculation.ll b/test/Analysis/BasicAA/phi-speculation.ll new file mode 100644 index 0000000000..21c6592986 --- /dev/null +++ b/test/Analysis/BasicAA/phi-speculation.ll @@ -0,0 +1,33 @@ +target datalayout = +"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +; ptr_phi and ptr2_phi do not alias. +; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi + +define i32 @test_noalias(i32* %ptr2, i32 %count, i32* %coeff) { +entry: + %ptr = getelementptr inbounds i32* %ptr2, i64 1 + br label %while.body + +while.body: + %num = phi i32 [ %count, %entry ], [ %dec, %while.body ] + %ptr_phi = phi i32* [ %ptr, %entry ], [ %ptr_inc, %while.body ] + %ptr2_phi = phi i32* [ %ptr2, %entry ], [ %ptr2_inc, %while.body ] + %result.09 = phi i32 [ 0 , %entry ], [ %add, %while.body ] + %dec = add nsw i32 %num, -1 + %0 = load i32* %ptr_phi, align 4 + store i32 %0, i32* %ptr2_phi, align 4 + %1 = load i32* %coeff, align 4 + %2 = load i32* %ptr_phi, align 4 + %mul = mul nsw i32 %1, %2 + %add = add nsw i32 %mul, %result.09 + %tobool = icmp eq i32 %dec, 0 + %ptr_inc = getelementptr inbounds i32* %ptr_phi, i64 1 + %ptr2_inc = getelementptr inbounds i32* %ptr2_phi, i64 1 + br i1 %tobool, label %the_exit, label %while.body + +the_exit: + ret i32 %add +} diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll index 74d06a18f7..08adfa8a36 100644 --- a/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/test/Analysis/BranchProbabilityInfo/basic.ll @@ -88,3 +88,30 @@ exit: } !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4} + +define i32 @test4(i32 %x) nounwind uwtable readnone ssp { +; CHECK: Printing analysis {{.*}} for function 'test4' +entry: + %conv = sext i32 %x to i64 + switch i64 %conv, label %return [ + i64 0, label %sw.bb + i64 1, label %sw.bb + i64 2, label %sw.bb + i64 5, label %sw.bb1 + ], !prof !2 +; CHECK: edge entry -> return probability is 7 / 85 +; CHECK: edge entry -> sw.bb probability is 14 / 85 +; CHECK: edge entry -> sw.bb1 probability is 64 / 85 + +sw.bb: + br label %return + +sw.bb1: + br label %return + +return: + %retval.0 = phi i32 [ 5, %sw.bb1 ], [ 1, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} + +!2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} diff --git a/test/Analysis/Profiling/load-branch-weights-ifs.ll b/test/Analysis/Profiling/load-branch-weights-ifs.ll new file mode 100644 index 0000000000..ddbaf96916 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-ifs.ll @@ -0,0 +1,122 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +; FIXME: profile_rt.dll could be built on win32. +; REQUIRES: loadable_module + +;; func_mod - Branch taken 6 times in 7. +define i32 @func_mod(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 7 + %tobool = icmp ne i32 %rem, 0 + br i1 %tobool, label %if.then, label %if.else +; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0 + +if.then: + store i32 1, i32* %retval + br label %return + +if.else: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_const_true - conditional branch which 100% taken probability. +define i32 @func_const_true(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + store i32 1, i32* %retval + br label %return + +if.end: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_const_true - conditional branch which 100% not-taken probability. +define i32 @func_const_false(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2 + +if.then: + store i32 1, i32* %retval + br label %return + +if.end: + store i32 0, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %loop = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %cmp = icmp slt i32 %0, 7000 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3 + +for.body: + %1 = load i32* %loop, align 4 + %call = call i32 @func_mod(i32 %1) + br label %for.inc + +for.inc: + %2 = load i32* %loop, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %call1 = call i32 @func_const_true(i32 1) + %call2 = call i32 @func_const_false(i32 0) + ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1} +; CHECK-NOT: !4 diff --git a/test/Analysis/Profiling/load-branch-weights-loops.ll b/test/Analysis/Profiling/load-branch-weights-loops.ll new file mode 100644 index 0000000000..476f377b47 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-loops.ll @@ -0,0 +1,188 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +; FIXME: profile_rt.dll could be built on win32. +; REQUIRES: loadable_module + +;; func_for - Test branch probabilities for a vanilla for loop. +define i32 @func_for(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0 + +for.body: + %2 = load i32* %N.addr, align 4 + %3 = load i32* %ret, align 4 + %add = add nsw i32 %3, %2 + store i32 %add, i32* %ret, align 4 + br label %for.inc + +for.inc: + %4 = load i32* %loop, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +;; func_for_odd - Test branch probabilities for a for loop with a continue and +;; a break. +define i32 @func_for_odd(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: + %2 = load i32* %loop, align 4 + %rem = srem i32 %2, 10 + %tobool = icmp ne i32 %rem, 0 + br i1 %tobool, label %if.then, label %if.end +; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2 + +if.then: + br label %for.inc + +if.end: + %3 = load i32* %loop, align 4 + %cmp1 = icmp eq i32 %3, 500 + br i1 %cmp1, label %if.then2, label %if.end3 +; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3 + +if.then2: + br label %for.end + +if.end3: + %4 = load i32* %N.addr, align 4 + %5 = load i32* %ret, align 4 + %add = add nsw i32 %5, %4 + store i32 %add, i32* %ret, align 4 + br label %for.inc + +for.inc: + %6 = load i32* %loop, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + %7 = load i32* %ret, align 4 + ret i32 %7 +} + +;; func_while - Test branch probability in a vanilla while loop. +define i32 @func_while(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %while.cond + +while.cond: + %0 = load i32* %loop, align 4 + %1 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %while.body, label %while.end +; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0 + +while.body: + %2 = load i32* %N.addr, align 4 + %3 = load i32* %ret, align 4 + %add = add nsw i32 %3, %2 + store i32 %add, i32* %ret, align 4 + %4 = load i32* %loop, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* %loop, align 4 + br label %while.cond + +while.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +;; func_while - Test branch probability in a vanilla do-while loop. +define i32 @func_do_while(i32 %N) nounwind uwtable { +entry: + %N.addr = alloca i32, align 4 + %ret = alloca i32, align 4 + %loop = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %ret, align 4 + store i32 0, i32* %loop, align 4 + br label %do.body + +do.body: + %0 = load i32* %N.addr, align 4 + %1 = load i32* %ret, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %ret, align 4 + %2 = load i32* %loop, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %loop, align 4 + br label %do.cond + +do.cond: + %3 = load i32* %loop, align 4 + %4 = load i32* %N.addr, align 4 + %cmp = icmp slt i32 %3, %4 + br i1 %cmp, label %do.body, label %do.end +; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4 + +do.end: + %5 = load i32* %ret, align 4 + ret i32 %5 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval + %call = call i32 @func_for(i32 1000) + %call1 = call i32 @func_for_odd(i32 1000) + %call2 = call i32 @func_while(i32 1000) + %call3 = call i32 @func_do_while(i32 1000) + ret i32 0 +} + +!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1} +!1 = metadata !{metadata !"branch_weights", i32 501, i32 0} +!2 = metadata !{metadata !"branch_weights", i32 450, i32 51} +!3 = metadata !{metadata !"branch_weights", i32 1, i32 50} +!4 = metadata !{metadata !"branch_weights", i32 999, i32 1} +; CHECK-NOT: !5 diff --git a/test/Analysis/Profiling/load-branch-weights-switches.ll b/test/Analysis/Profiling/load-branch-weights-switches.ll new file mode 100644 index 0000000000..be11f040a7 --- /dev/null +++ b/test/Analysis/Profiling/load-branch-weights-switches.ll @@ -0,0 +1,165 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN: -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN: | FileCheck %s +; RUN: rm -f %t1.prof_data + +; FIXME: profile_rt.dll could be built on win32. +; REQUIRES: loadable_module + +;; func_switch - Test branch probabilities for a switch instruction with an +;; even chance of taking each case (or no case). +define i32 @func_switch(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 4 + switch i32 %rem, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ] +; CHECK: ], !prof !0 + +sw.bb: + store i32 5, i32* %retval + br label %return + +sw.bb1: + store i32 6, i32* %retval + br label %return + +sw.bb2: + store i32 7, i32* %retval + br label %return + +sw.epilog: + store i32 8, i32* %retval + br label %return + +return: + %1 = load i32* %retval + ret i32 %1 +} + +;; func_switch_switch - Test branch probabilities in a switch-instruction that +;; leads to further switch instructions. The first-tier switch occludes some +;; possibilities in the second-tier switches, leading to some branches having a +;; 0 probability. +define i32 @func_switch_switch(i32 %N) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %N.addr = alloca i32, align 4 + store i32 %N, i32* %N.addr, align 4 + %0 = load i32* %N.addr, align 4 + %rem = srem i32 %0, 2 + switch i32 %rem, label %sw.default11 [ + i32 0, label %sw.bb + i32 1, label %sw.bb5 + ] +; CHECK: ], !prof !1 + +sw.bb: + %1 = load i32* %N.addr, align 4 + %rem1 = srem i32 %1, 4 + switch i32 %rem1, label %sw.default [ + i32 0, label %sw.bb2 + i32 1, label %sw.bb3 + i32 2, label %sw.bb4 + ] +; CHECK: ], !prof !2 + +sw.bb2: + store i32 5, i32* %retval + br label %return + +sw.bb3: + store i32 6, i32* %retval + br label %return + +sw.bb4: + store i32 7, i32* %retval + br label %return + +sw.default: + store i32 8, i32* %retval + br label %return + +sw.bb5: + %2 = load i32* %N.addr, align 4 + %rem6 = srem i32 %2, 4 + switch i32 %rem6, label %sw.default10 [ + i32 0, label %sw.bb7 + i32 1, label %sw.bb8 + i32 2, label %sw.bb9 + ] +; CHECK: ], !prof !3 + +sw.bb7: + store i32 9, i32* %retval + br label %return + +sw.bb8: + store i32 10, i32* %retval + br label %return + +sw.bb9: + store i32 11, i32* %retval + br label %return + +sw.default10: + store i32 12, i32* %retval + br label %return + +sw.default11: + store i32 13, i32* %retval + br label %return + +return: + %3 = load i32* %retval + ret i32 %3 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %loop = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %loop, align 4 + br label %for.cond + +for.cond: + %0 = load i32* %loop, align 4 + %cmp = icmp slt i32 %0, 4000 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4 + +for.body: + %1 = load i32* %loop, align 4 + %call = call i32 @func_switch(i32 %1) + %2 = load i32* %loop, align 4 + %call1 = call i32 @func_switch_switch(i32 %2) + br label %for.inc + +for.inc: + %3 = load i32* %loop, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* %loop, align 4 + br label %for.cond + +for.end: + ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0} +; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1} +; CHECK-NOT: !5 diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index b8eb6d3e3d..61be4b7703 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -113,14 +113,14 @@ let test_constants () = ignore (define_global "const_int_string" c m); insist (i32_type = type_of c); - (* RUN: grep 'const_string.*"cruel\00world"' < %t.ll + (* RUN: grep 'const_string.*"cruel\\00world"' < %t.ll *) group "string"; let c = const_string context "cruel\000world" in ignore (define_global "const_string" c m); insist ((array_type i8_type 11) = type_of c); - (* RUN: grep 'const_stringz.*"hi\00again\00"' < %t.ll + (* RUN: grep 'const_stringz.*"hi\\00again\\00"' < %t.ll *) group "stringz"; let c = const_stringz context "hi\000again" in @@ -187,7 +187,7 @@ let test_constants () = ignore (define_global "const_all_ones" c m); group "pointer null"; begin - (* RUN: grep "const_pointer_null = global i64* null" < %t.ll + (* RUN: grep "const_pointer_null = global i64\* null" < %t.ll *) let c = const_pointer_null (pointer_type i64_type) in ignore (define_global "const_pointer_null" c m); @@ -542,7 +542,7 @@ let test_users () = (*===-- Aliases -----------------------------------------------------------===*) let test_aliases () = - (* RUN: grep "@alias = alias i32* @aliasee" < %t.ll + (* RUN: grep "@alias = alias i32\* @aliasee" < %t.ll *) let v = declare_global i32_type "aliasee" m in ignore (add_alias m (pointer_type i32_type) v "alias") @@ -554,7 +554,7 @@ let test_functions () = let ty = function_type i32_type [| i32_type; i64_type |] in let ty2 = function_type i8_type [| i8_type; i64_type |] in - (* RUN: grep "declare i32 @Fn1\(i32, i64\)" < %t.ll + (* RUN: grep 'declare i32 @Fn1(i32, i64)' < %t.ll *) begin group "declare"; insist (None = lookup_function "Fn1" m); @@ -935,7 +935,7 @@ let test_builder () = group "malloc/free"; begin (* RUN: grep "call.*@malloc(i32 ptrtoint" < %t.ll - * RUN: grep "call.*@free(i8*" < %t.ll + * RUN: grep "call.*@free(i8\*" < %t.ll * RUN: grep "call.*@malloc(i32 %" < %t.ll *) let bb1 = append_block context "MallocBlock1" fn in @@ -947,7 +947,7 @@ let test_builder () = end; group "indirectbr"; begin - (* RUN: grep "indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]" < %t.ll + (* RUN: grep "indirectbr i8\* blockaddress(@X7, %IBRBlock2), \[label %IBRBlock2, label %IBRBlock3\]" < %t.ll *) let bb1 = append_block context "IBRBlock1" fn in @@ -1054,10 +1054,10 @@ let test_builder () = (* RUN: grep "%build_alloca = alloca i32" < %t.ll * RUN: grep "%build_array_alloca = alloca i32, i32 %P2" < %t.ll - * RUN: grep "%build_load = load i32* %build_array_alloca" < %t.ll - * RUN: grep "store i32 %P2, i32* %build_alloca" < %t.ll - * RUN: grep "%build_gep = getelementptr i32* %build_array_alloca, i32 %P2" < %t.ll - * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2" < %t.ll + * RUN: grep "%build_load = load i32\* %build_array_alloca" < %t.ll + * RUN: grep "store i32 %P2, i32\* %build_alloca" < %t.ll + * RUN: grep "%build_gep = getelementptr i32\* %build_array_alloca, i32 %P2" < %t.ll + * RUN: grep "%build_in_bounds_gep = getelementptr inbounds i32\* %build_array_alloca, i32 %P2" < %t.ll * RUN: grep "%build_struct_gep = getelementptr inbounds.*%build_alloca2, i32 0, i32 1" < %t.ll *) let alloca = build_alloca i32_type "build_alloca" b in @@ -1106,14 +1106,14 @@ let test_builder () = * RUN: grep "%build_fptrunc2 = fptrunc double %build_sitofp to float" < %t.ll * RUN: grep "%build_fpext = fpext float %build_fptrunc to double" < %t.ll * RUN: grep "%build_fpext2 = fpext float %build_fptrunc to double" < %t.ll - * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8*" < %t.ll - * RUN: grep "%build_ptrtoint = ptrtoint i8* %build_inttoptr to i64" < %t.ll - * RUN: grep "%build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64" < %t.ll + * RUN: grep "%build_inttoptr = inttoptr i32 %P1 to i8\*" < %t.ll + * RUN: grep "%build_ptrtoint = ptrtoint i8\* %build_inttoptr to i64" < %t.ll + * RUN: grep "%build_ptrtoint2 = ptrtoint i8\* %build_inttoptr to i64" < %t.ll * RUN: grep "%build_bitcast = bitcast i64 %build_ptrtoint to double" < %t.ll * RUN: grep "%build_bitcast2 = bitcast i64 %build_ptrtoint to double" < %t.ll * RUN: grep "%build_bitcast3 = bitcast i64 %build_ptrtoint to double" < %t.ll * RUN: grep "%build_bitcast4 = bitcast i64 %build_ptrtoint to double" < %t.ll - * RUN: grep "%build_pointercast = bitcast i8* %build_inttoptr to i16*" < %t.ll + * RUN: grep "%build_pointercast = bitcast i8\* %build_inttoptr to i16*" < %t.ll *) let inst28 = build_trunc p1 i8_type "build_trunc" atentry in let inst29 = build_zext inst28 i32_type "build_zext" atentry in @@ -1148,7 +1148,7 @@ let test_builder () = * RUN: grep "%build_fcmp_false = fcmp false float %F1, %F2" < %t.ll * RUN: grep "%build_fcmp_true = fcmp true float %F2, %F1" < %t.ll * RUN: grep "%build_is_null.*= icmp eq.*%X0,.*null" < %t.ll - * RUN: grep "%build_is_not_null = icmp ne i8* %X1, null" < %t.ll + * RUN: grep "%build_is_not_null = icmp ne i8\* %X1, null" < %t.ll * RUN: grep "%build_ptrdiff" < %t.ll *) ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry); @@ -1167,7 +1167,7 @@ let test_builder () = group "miscellaneous"; begin (* RUN: grep "%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)" < %t.ll * RUN: grep "%build_select = select i1 %build_icmp, i32 %P1, i32 %P2" < %t.ll - * RUN: grep "%build_va_arg = va_arg i8** null, i32" < %t.ll + * RUN: grep "%build_va_arg = va_arg i8\*\* null, i32" < %t.ll * RUN: grep "%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2" < %t.ll * RUN: grep "%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2" < %t.ll * RUN: grep "%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>" < %t.ll @@ -1240,8 +1240,8 @@ let test_builder () = end; group "dbg"; begin - (* RUN: grep "%dbg = add i32 %P1, %P2, !dbg !1" < %t.ll - * RUN: grep "!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}" < %t.ll + (* RUN: grep '%dbg = add i32 %P1, %P2, !dbg !1' < %t.ll + * RUN: grep '!1 = metadata !{i32 2, i32 3, metadata !2, metadata !2}' < %t.ll *) insist ((current_debug_location atentry) = None); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 991cc9df16..b9b223713f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -18,6 +18,7 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests" llvm-dis llvm-extract llvm-dwarfdump llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj macho-dump opt + profile_rt-shared FileCheck count not yaml2obj ) diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll new file mode 100644 index 0000000000..ec7f72d7c2 --- /dev/null +++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll @@ -0,0 +1,129 @@ +; RUN: llc < %s -mcpu=cortex-a8 -march=thumb +; Test that this doesn't crash. +; <rdar://problem/12183003> + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" +target triple = "thumbv7-apple-ios5.1.0" + +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind + +define void @findEdges(i8*) nounwind ssp { + %2 = icmp sgt i32 undef, 0 + br i1 %2, label %5, label %3 + +; <label>:3 ; preds = %5, %1 + %4 = phi i8* [ %0, %1 ], [ %19, %5 ] + ret void + +; <label>:5 ; preds = %5, %1 + %6 = phi i8* [ %19, %5 ], [ %0, %1 ] + %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* null, i32 1) + %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0 + %9 = getelementptr inbounds i8* null, i32 3 + %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %9, i32 1) + %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %10, 2 + %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %6, i32 1) + %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 0 + %14 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 1 + %15 = getelementptr inbounds i8* %6, i32 3 + %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %15, i32 1) + %17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 1 + %18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 2 + %19 = getelementptr inbounds i8* %6, i32 48 + %20 = bitcast <16 x i8> %13 to <2 x i64> + %21 = bitcast <16 x i8> %8 to <2 x i64> + %22 = bitcast <16 x i8> %14 to <2 x i64> + %23 = shufflevector <2 x i64> %22, <2 x i64> undef, <1 x i32> zeroinitializer + %24 = bitcast <1 x i64> %23 to <8 x i8> + %25 = zext <8 x i8> %24 to <8 x i16> + %26 = sub <8 x i16> zeroinitializer, %25 + %27 = bitcast <16 x i8> %17 to <2 x i64> + %28 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %26) nounwind + %29 = mul <8 x i16> %28, %28 + %30 = add <8 x i16> zeroinitializer, %29 + %31 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> undef, <8 x i16> %30) nounwind + %32 = bitcast <16 x i8> %11 to <2 x i64> + %33 = shufflevector <2 x i64> %32, <2 x i64> undef, <1 x i32> zeroinitializer + %34 = bitcast <1 x i64> %33 to <8 x i8> + %35 = zext <8 x i8> %34 to <8 x i16> + %36 = sub <8 x i16> %35, zeroinitializer + %37 = bitcast <16 x i8> %18 to <2 x i64> + %38 = shufflevector <2 x i64> %37, <2 x i64> undef, <1 x i32> zeroinitializer + %39 = bitcast <1 x i64> %38 to <8 x i8> + %40 = zext <8 x i8> %39 to <8 x i16> + %41 = sub <8 x i16> zeroinitializer, %40 + %42 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %36) nounwind + %43 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %41) nounwind + %44 = mul <8 x i16> %42, %42 + %45 = mul <8 x i16> %43, %43 + %46 = add <8 x i16> %45, %44 + %47 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %31, <8 x i16> %46) nounwind + %48 = bitcast <8 x i16> %47 to <2 x i64> + %49 = shufflevector <2 x i64> %48, <2 x i64> undef, <1 x i32> zeroinitializer + %50 = bitcast <1 x i64> %49 to <4 x i16> + %51 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %50, <4 x i16> undef) nounwind + %52 = tail call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %51, <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>) + %53 = bitcast <4 x i16> %52 to <1 x i64> + %54 = shufflevector <1 x i64> %53, <1 x i64> undef, <2 x i32> <i32 0, i32 1> + %55 = bitcast <2 x i64> %54 to <8 x i16> + %56 = tail call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %55, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>) + %57 = shufflevector <2 x i64> %20, <2 x i64> undef, <1 x i32> <i32 1> + %58 = bitcast <1 x i64> %57 to <8 x i8> + %59 = zext <8 x i8> %58 to <8 x i16> + %60 = sub <8 x i16> zeroinitializer, %59 + %61 = shufflevector <2 x i64> %21, <2 x i64> undef, <1 x i32> <i32 1> + %62 = bitcast <1 x i64> %61 to <8 x i8> + %63 = zext <8 x i8> %62 to <8 x i16> + %64 = sub <8 x i16> %63, zeroinitializer + %65 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %60) nounwind + %66 = mul <8 x i16> %65, %65 + %67 = add <8 x i16> zeroinitializer, %66 + %68 = shufflevector <2 x i64> %27, <2 x i64> undef, <1 x i32> <i32 1> + %69 = bitcast <1 x i64> %68 to <8 x i8> + %70 = zext <8 x i8> %69 to <8 x i16> + %71 = sub <8 x i16> zeroinitializer, %70 + %72 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> undef) nounwind + %73 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %71) nounwind + %74 = mul <8 x i16> %72, %72 + %75 = mul <8 x i16> %73, %73 + %76 = add <8 x i16> %75, %74 + %77 = shufflevector <2 x i64> %32, <2 x i64> undef, <1 x i32> <i32 1> + %78 = bitcast <1 x i64> %77 to <8 x i8> + %79 = zext <8 x i8> %78 to <8 x i16> + %80 = sub <8 x i16> %79, zeroinitializer + %81 = tail call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %80) nounwind + %82 = mul <8 x i16> %81, %81 + %83 = add <8 x i16> zeroinitializer, %82 + %84 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %76, <8 x i16> %83) nounwind + %85 = tail call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %67, <8 x i16> %84) nounwind + %86 = bitcast <8 x i16> %85 to <2 x i64> + %87 = shufflevector <2 x i64> %86, <2 x i64> undef, <1 x i32> <i32 1> + %88 = bitcast <1 x i64> %87 to <4 x i16> + %89 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %88, <4 x i16> undef) nounwind + %90 = tail call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %89, <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>) + %91 = bitcast <4 x i16> %90 to <1 x i64> + %92 = shufflevector <1 x i64> undef, <1 x i64> %91, <2 x i32> <i32 0, i32 1> + %93 = bitcast <2 x i64> %92 to <8 x i16> + %94 = tail call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %93, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>) + %95 = bitcast <8 x i8> %56 to <1 x i64> + %96 = bitcast <8 x i8> %94 to <1 x i64> + %97 = shufflevector <1 x i64> %95, <1 x i64> %96, <2 x i32> <i32 0, i32 1> + %98 = bitcast <2 x i64> %97 to <16 x i8> + tail call void @llvm.arm.neon.vst1.v16i8(i8* null, <16 x i8> %98, i32 1) + %99 = icmp slt i32 undef, undef + br i1 %99, label %5, label %3 +} + +declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone + +declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone + +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone diff --git a/test/CodeGen/ARM/2012-08-30-select.ll b/test/CodeGen/ARM/2012-08-30-select.ll new file mode 100644 index 0000000000..8471be5330 --- /dev/null +++ b/test/CodeGen/ARM/2012-08-30-select.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s +; rdar://12201387 + +;CHECK: select_s_v_v +;CHECK: it ne +;CHECK-NEXT: vmovne.i32 +;CHECK: bx +define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) { +entry: + %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1) + %and = and i32 %avail, 1 + %tobool = icmp eq i32 %and, 0 + %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer + ret <16 x i8> %vld1. +} + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 ) + diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index 8967730835..6e6b36377f 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -159,3 +159,13 @@ entry: store i8 %3, i8* %old ret void } + +; CHECK: func4 +; This function should not need to use callee-saved registers. +; rdar://problem/12203728 +; CHECK-NOT: r4 +define i32 @func4(i32* %p) nounwind optsize ssp { +entry: + %0 = atomicrmw add i32* %p, i32 1 monotonic + ret i32 %0 +} diff --git a/test/CodeGen/ARM/crash-shufflevector.ll b/test/CodeGen/ARM/crash-shufflevector.ll deleted file mode 100644 index ece4234699..0000000000 --- a/test/CodeGen/ARM/crash-shufflevector.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc < %s -mtriple=armv7-- - -declare void @g(<16 x i8>) -define void @f(<4 x i8> %param1, <4 x i8> %param2) { - %y1 = shufflevector <4 x i8> %param1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %y2 = shufflevector <4 x i8> %param2, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %z = shufflevector <16 x i8> %y1, <16 x i8> %y2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> - call void @g(<16 x i8> %z) - ret void -} diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll new file mode 100644 index 0000000000..e19185b0eb --- /dev/null +++ b/test/CodeGen/ARM/domain-conv-vmovs.ll @@ -0,0 +1,84 @@ +; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s + +define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane0to0: + %vec = insertelement <2 x float> %in, float %arg, i32 0 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d0, #1 +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane0to1: + %vec = insertelement <2 x float> %in, float %arg, i32 1 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vext.32 d1, d1, d0, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane1to0: + %vec = insertelement <2 x float> %in, float %arg, i32 0 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vext.32 d1, d0, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + +define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) { +; CHECK: test_vmovs_via_vext_lane1to1: + %vec = insertelement <2 x float> %in, float %arg, i32 1 + %res = fadd <2 x float> %vec, %vec + +; CHECK: vext.32 d1, d0, d1, #1 +; CHECK: vext.32 d1, d1, d1, #1 +; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1 + + ret <2 x float> %res +} + + +define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) { +; CHECK: test_vmovs_via_vdup: + + ; Do an operation (which will end up NEON because of +neonfp) to convince the + ; execution-domain pass that NEON is a good thing to use. + %res = fadd float %ret, %ret + ; It makes sense for LLVM to do the addition in d0 here, because it's going + ; to be returned. This means it will want a "vmov s0, s1": +; CHECK: vdup.32 d0, d0[1] + + ret float %res +} + +declare float @llvm.sqrt.f32(float) + +declare void @bar() + +; This is a comp +define float @test_ineligible(float, float %in) { +; CHECK: test_ineligible: + + %sqrt = call float @llvm.sqrt.f32(float %in) + %val = fadd float %sqrt, %sqrt + + ; This call forces a move from a callee-saved register to the return-reg. That + ; move is not eligible for conversion to a d-register instructions because the + ; use-def chains would be messed up. Primarily a compile-test (we used to + ; internal fault). + call void @bar() +; CHECL: bl bar +; CHECK: vmov.f32 {{s[0-9]+}}, {{s[0-9]+}} + ret float %val +}
\ No newline at end of file diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll new file mode 100644 index 0000000000..392a845d2c --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-pic.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7 + +@g = global i32 0, align 4 + +define i32 @LoadGV() { +entry: +; THUMB: LoadGV +; THUMB: movw [[reg0:r[0-9]+]], +; THUMB: movt [[reg0]], +; THUMB: add [[reg0]], pc +; ARM: LoadGV +; ARM: ldr [[reg1:r[0-9]+]], +; ARM: add [[reg1]], pc, [[reg1]] +; ARMv7: LoadGV +; ARMv7: movw [[reg2:r[0-9]+]], +; ARMv7: movt [[reg2]], +; ARMv7: add [[reg2]], pc, [[reg2]] + %tmp = load i32* @g + ret i32 %tmp +} + +@i = external global i32 + +define i32 @LoadIndirectSymbol() { +entry: +; THUMB: LoadIndirectSymbol +; THUMB: movw r[[reg3:[0-9]+]], +; THUMB: movt r[[reg3]], +; THUMB: add r[[reg3]], pc +; THUMB: ldr r[[reg3]], [r[[reg3]]] +; ARM: LoadIndirectSymbol +; ARM: ldr [[reg4:r[0-9]+]], +; ARM: ldr [[reg4]], [pc, [[reg4]]] +; ARMv7: LoadIndirectSymbol +; ARMv7: movw r[[reg5:[0-9]+]], +; ARMv7: movt r[[reg5]], +; ARMv7: add r[[reg5]], pc, r[[reg5]] +; ARMv7: ldr r[[reg5]], [r[[reg5]]] + %tmp = load i32* @i + ret i32 %tmp +} diff --git a/test/CodeGen/ARM/fp-fast.ll b/test/CodeGen/ARM/fp-fast.ll new file mode 100644 index 0000000000..ec57187381 --- /dev/null +++ b/test/CodeGen/ARM/fp-fast.ll @@ -0,0 +1,60 @@ +; RUN: llc -march=arm -mcpu=cortex-a9 -mattr=+vfp4 -enable-unsafe-fp-math < %s | FileCheck %s + +; CHECK: test1 +define float @test1(float %x) { +; CHECK-NOT: vfma +; CHECK: vmul.f32 +; CHECK-NOT: vfma + %t1 = fmul float %x, 3.0 + %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %t1) + ret float %t2 +} + +; CHECK: test2 +define float @test2(float %x, float %y) { +; CHECK-NOT: vmul +; CHECK: vfma.f32 +; CHECK-NOT: vmul + %t1 = fmul float %x, 3.0 + %t2 = call float @llvm.fma.f32(float %t1, float 2.0, float %y) + ret float %t2 +} + +; CHECK: test3 +define float @test3(float %x, float %y) { +; CHECK-NOT: vfma +; CHECK: vadd.f32 +; CHECK-NOT: vfma + %t2 = call float @llvm.fma.f32(float %x, float 1.0, float %y) + ret float %t2 +} + +; CHECK: test4 +define float @test4(float %x, float %y) { +; CHECK-NOT: vfma +; CHECK: vsub.f32 +; CHECK-NOT: vfma + %t2 = call float @llvm.fma.f32(float %x, float -1.0, float %y) + ret float %t2 +} + +; CHECK: test5 +define float @test5(float %x) { +; CHECK-NOT: vfma +; CHECK: vmul.f32 +; CHECK-NOT: vfma + %t2 = call float @llvm.fma.f32(float %x, float 2.0, float %x) + ret float %t2 +} + +; CHECK: test6 +define float @test6(float %x) { +; CHECK-NOT: vfma +; CHECK: vmul.f32 +; CHECK-NOT: vfma + %t1 = fsub float -0.0, %x + %t2 = call float @llvm.fma.f32(float %x, float 5.0, float %t1) + ret float %t2 +} + +declare float @llvm.fma.f32(float, float, float) diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll new file mode 100644 index 0000000000..4f2d7e3f73 --- /dev/null +++ b/test/CodeGen/ARM/integer_insertelement.ll @@ -0,0 +1,35 @@ +; RUN: llc %s -o - -march=arm -mattr=+neon | FileCheck %s + +; This test checks that when inserting one (integer) element into a vector, +; the vector is not spuriously copied. "vorr dX, dY, dY" is the way of moving +; one DPR to another that we check for. + +; CHECK: @f +; CHECK-NOT: vorr d +; CHECK: vmov s +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <4 x i32> @f(<4 x i32> %in) { + %1 = insertelement <4 x i32> %in, i32 255, i32 3 + ret <4 x i32> %1 +} + +; CHECK: @g +; CHECK-NOT: vorr d +; CHECK: vmov.16 d +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <8 x i16> @g(<8 x i16> %in) { + %1 = insertelement <8 x i16> %in, i16 255, i32 7 + ret <8 x i16> %1 +} + +; CHECK: @h +; CHECK-NOT: vorr d +; CHECK: vmov.8 d +; CHECK-NOT: vorr d +; CHECK: mov pc, lr +define <16 x i8> @h(<16 x i8> %in) { + %1 = insertelement <16 x i8> %in, i8 255, i32 15 + ret <16 x i8> %1 +} diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll new file mode 100644 index 0000000000..e4a00e9ac3 --- /dev/null +++ b/test/CodeGen/ARM/longMAC.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -march=arm | FileCheck %s +; Check generated signed and unsigned multiply accumulate long. + +define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { +;CHECK: MACLongTest1: +;CHECK: umlal + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %mul = mul i64 %conv1, %conv + %add = add i64 %mul, %c + ret i64 %add +} + +define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) { +;CHECK: MACLongTest2: +;CHECK: smlal + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv1, %conv + %add = add nsw i64 %mul, %c + ret i64 %add +} + +define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) { +;CHECK: MACLongTest3: +;CHECK: umlal + %conv = zext i32 %b to i64 + %conv1 = zext i32 %a to i64 + %mul = mul i64 %conv, %conv1 + %conv2 = zext i32 %c to i64 + %add = add i64 %mul, %conv2 + ret i64 %add +} + +define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) { +;CHECK: MACLongTest4: +;CHECK: smlal + %conv = sext i32 %b to i64 + %conv1 = sext i32 %a to i64 + %mul = mul nsw i64 %conv, %conv1 + %conv2 = sext i32 %c to i64 + %add = add nsw i64 %mul, %conv2 + ret i64 %add +} diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 5575566628..62708ed53d 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -80,7 +80,7 @@ define double @f7(double %a, double %b) { ; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0 ; CHECK-NEON-NEXT: cmp r0, [[R3]] ; CHECK-NEON-NEXT: it eq -; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]] +; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4 ; CHECK-NEON-NEXT: ldr ; CHECK-NEON: bx diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index cfc0e70120..7507808912 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -9,7 +9,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; T2: t1: ; T2: mvn r0, #-2147483648 -; T2: addle.w r1, r1 +; T2: addle r1, r0 ; T2: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 @@ -23,7 +23,7 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ARM: mov r0, r1 ; T2: t2: -; T2: suble.w r1, r1, #10 +; T2: suble r1, #10 ; T2: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 @@ -37,7 +37,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { ; ARM: mov r0, r3 ; T2: t3: -; T2: andge.w r3, r3, r2 +; T2: andge r3, r2 ; T2: mov r0, r3 %cond = icmp slt i32 %a, %b %z = select i1 %cond, i32 -1, i32 %x @@ -51,7 +51,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { ; ARM: mov r0, r3 ; T2: t4: -; T2: orrge.w r3, r3, r2 +; T2: orrge r3, r2 ; T2: mov r0, r3 %cond = icmp slt i32 %a, %b %z = select i1 %cond, i32 0, i32 %x @@ -81,7 +81,7 @@ define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; T2: t6: ; T2-NOT: movge -; T2: eorlt.w r3, r3, r2 +; T2: eorlt r3, r2 %cond = icmp slt i32 %a, %b %tmp1 = select i1 %cond, i32 %c, i32 0 %tmp2 = xor i32 %tmp1, %d @@ -200,7 +200,7 @@ entry: ; T2: t13 ; T2: cmp r1, #10 -; T2: addgt.w r0, r0, #1 +; T2: addgt r0, #1 %cmp = icmp sgt i32 %a, 10 %conv = zext i1 %cmp to i32 %add = add i32 %conv, %c @@ -216,7 +216,7 @@ entry: ; T2: t14 ; T2: cmp r1, #10 -; T2: subgt.w r0, r0, #1 +; T2: subgt r0, #1 %cmp = icmp sgt i32 %a, 10 %conv = sext i1 %cmp to i32 %add = add i32 %conv, %c diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index 05332e4d8c..a8c224b438 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -261,3 +261,37 @@ define void @redundantVdup(<8 x i8>* %ptr) nounwind { store <8 x i8> %2, <8 x i8>* %ptr, align 8 ret void } + +define <4 x i32> @tdupi(i32 %x, i32 %y) { +;CHECK: tdupi +;CHECK: vdup.32 + %1 = insertelement <4 x i32> undef, i32 %x, i32 0 + %2 = insertelement <4 x i32> %1, i32 %x, i32 1 + %3 = insertelement <4 x i32> %2, i32 %x, i32 2 + %4 = insertelement <4 x i32> %3, i32 %y, i32 3 + ret <4 x i32> %4 +} + +define <4 x float> @tdupf(float %x, float %y) { +;CHECK: tdupf +;CHECK: vdup.32 + %1 = insertelement <4 x float> undef, float %x, i32 0 + %2 = insertelement <4 x float> %1, float %x, i32 1 + %3 = insertelement <4 x float> %2, float %x, i32 2 + %4 = insertelement <4 x float> %3, float %y, i32 3 + ret <4 x float> %4 +} + +; This test checks that when splatting an element from a vector into another, +; the value isn't moved out to GPRs first. +define <4 x i32> @tduplane(<4 x i32> %invec) { +;CHECK: tduplane +;CHECK-NOT: vmov {{.*}}, d16[1] +;CHECK: vdup.32 {{.*}}, d16[1] + %in = extractelement <4 x i32> %invec, i32 1 + %1 = insertelement <4 x i32> undef, i32 %in, i32 0 + %2 = insertelement <4 x i32> %1, i32 %in, i32 1 + %3 = insertelement <4 x i32> %2, i32 %in, i32 2 + %4 = insertelement <4 x i32> %3, i32 255, i32 3 + ret <4 x i32> %4 +} diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll index 8fd3db2919..22af797621 100644 --- a/test/CodeGen/ARM/vector-extend-narrow.ll +++ b/test/CodeGen/ARM/vector-extend-narrow.ll @@ -62,3 +62,14 @@ define <4 x i8> @i(<4 x i8>* %x) { %2 = sdiv <4 x i8> zeroinitializer, %1 ret <4 x i8> %2 } +; CHECK: j: +define <4 x i32> @j(<4 x i8>* %in) nounwind { + ; CHECK: vld1 + ; CHECK: vmovl.u8 + ; CHECK: vmovl.u16 + ; CHECK-NOT: vand + %1 = load <4 x i8>* %in, align 4 + %2 = zext <4 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +} + diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index 1fc885d613..2ed65c9aee 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { ;CHECK: vsetQ_lane32: -;CHECK: vmov.32 +;CHECK: vmov s %tmp1 = load <4 x i32>* %A %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 ret <4 x i32> %tmp2 diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll new file mode 100644 index 0000000000..802ee2cb05 --- /dev/null +++ b/test/CodeGen/Generic/MachineBranchProb.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -print-machineinstrs=expand-isel-pseudos -o /dev/null 2>&1 | FileCheck %s + +; Make sure we have the correct weight attached to each successor. +define i32 @test2(i32 %x) nounwind uwtable readnone ssp { +; CHECK: Machine code for function test2: +entry: + %conv = sext i32 %x to i64 + switch i64 %conv, label %return [ + i64 0, label %sw.bb + i64 1, label %sw.bb + i64 4, label %sw.bb + i64 5, label %sw.bb1 + ], !prof !0 +; CHECK: BB#0: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#2(64) BB#4(14) +; CHECK: BB#4: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#1(10) BB#5(4) +; CHECK: BB#5: derived from LLVM BB %entry +; CHECK: Successors according to CFG: BB#1(4) BB#3(7) + +sw.bb: + br label %return + +sw.bb1: + br label %return + +return: + %retval.0 = phi i32 [ 5, %sw.bb1 ], [ 1, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} + +!0 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll index e9ac8b6749..8a6efb620e 100644 --- a/test/CodeGen/Hexagon/args.ll +++ b/test/CodeGen/Hexagon/args.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s ; CHECK: r[[T0:[0-9]+]] = #7 ; CHECK: memw(r29 + #0) = r[[T0]] +; CHECK: r5 = #6 ; CHECK: r0 = #1 ; CHECK: r1 = #2 ; CHECK: r2 = #3 ; CHECK: r3 = #4 ; CHECK: r4 = #5 -; CHECK: r5 = #6 define void @foo() nounwind { diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll index ab69b22df5..186e393788 100644 --- a/test/CodeGen/Hexagon/newvaluestore.ll +++ b/test/CodeGen/Hexagon/newvaluestore.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s ; Check that we generate new value store packet in V4 @i = global i32 0, align 4 diff --git a/test/CodeGen/Hexagon/remove_lsr.ll b/test/CodeGen/Hexagon/remove_lsr.ll new file mode 100644 index 0000000000..79b5f4ae7c --- /dev/null +++ b/test/CodeGen/Hexagon/remove_lsr.ll @@ -0,0 +1,80 @@ +; Test fix for PR-13709. +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; CHECK: foo +; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32) +; CHECK-NOT: lsr(r{{[0-9]+}}:{{[0-9]+}}, #32) + +; Convert the sequence +; r17:16 = lsr(r11:10, #32) +; .. = r16 +; into +; r17:16 = lsr(r11:10, #32) +; .. = r11 +; This makes the lsr instruction dead and it gets removed subsequently +; by a dead code removal pass. + +%union.vect64 = type { i64 } +%union.vect32 = type { i32 } + +define void @foo(%union.vect64* nocapture %sss_extracted_bit_rx_data_ptr, + %union.vect32* nocapture %s_even, %union.vect32* nocapture %s_odd, + i8* nocapture %scr_s_even_code_ptr, i8* nocapture %scr_s_odd_code_ptr) + nounwind { +entry: + %scevgep = getelementptr %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1 + %scevgep28 = getelementptr %union.vect32* %s_odd, i32 1 + %scevgep32 = getelementptr %union.vect32* %s_even, i32 1 + %scevgep36 = getelementptr i8* %scr_s_odd_code_ptr, i32 1 + %scevgep39 = getelementptr i8* %scr_s_even_code_ptr, i32 1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv42 = phi i32 [ %lsr.iv.next, %for.body ], [ 2, %entry ] + %lsr.iv40 = phi i8* [ %scevgep41, %for.body ], [ %scevgep39, %entry ] + %lsr.iv37 = phi i8* [ %scevgep38, %for.body ], [ %scevgep36, %entry ] + %lsr.iv33 = phi %union.vect32* [ %scevgep34, %for.body ], [ %scevgep32, %entry ] + %lsr.iv29 = phi %union.vect32* [ %scevgep30, %for.body ], [ %scevgep28, %entry ] + %lsr.iv = phi %union.vect64* [ %scevgep26, %for.body ], [ %scevgep, %entry ] + %predicate_1.023 = phi i8 [ undef, %entry ], [ %10, %for.body ] + %predicate.022 = phi i8 [ undef, %entry ], [ %9, %for.body ] + %val.021 = phi i64 [ undef, %entry ], [ %srcval, %for.body ] + %lsr.iv3335 = bitcast %union.vect32* %lsr.iv33 to i32* + %lsr.iv2931 = bitcast %union.vect32* %lsr.iv29 to i32* + %lsr.iv27 = bitcast %union.vect64* %lsr.iv to i64* + %0 = tail call i64 @llvm.hexagon.A2.vsubhs(i64 0, i64 %val.021) + %conv3 = sext i8 %predicate.022 to i32 + %1 = trunc i64 %val.021 to i32 + %2 = trunc i64 %0 to i32 + %3 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv3, i32 %1, i32 %2) + store i32 %3, i32* %lsr.iv3335, align 4, !tbaa !0 + %conv8 = sext i8 %predicate_1.023 to i32 + %4 = lshr i64 %val.021, 32 + %5 = trunc i64 %4 to i32 + %6 = lshr i64 %0, 32 + %7 = trunc i64 %6 to i32 + %8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7) + store i32 %8, i32* %lsr.iv2931, align 4, !tbaa !0 + %srcval = load i64* %lsr.iv27, align 8 + %9 = load i8* %lsr.iv40, align 1, !tbaa !1 + %10 = load i8* %lsr.iv37, align 1, !tbaa !1 + %lftr.wideiv = trunc i32 %lsr.iv42 to i8 + %exitcond = icmp eq i8 %lftr.wideiv, 32 + %scevgep26 = getelementptr %union.vect64* %lsr.iv, i32 1 + %scevgep30 = getelementptr %union.vect32* %lsr.iv29, i32 1 + %scevgep34 = getelementptr %union.vect32* %lsr.iv33, i32 1 + %scevgep38 = getelementptr i8* %lsr.iv37, i32 1 + %scevgep41 = getelementptr i8* %lsr.iv40, i32 1 + %lsr.iv.next = add i32 %lsr.iv42, 1 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) nounwind readnone + +declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) nounwind readnone + +!0 = metadata !{metadata !"long", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll index 2e4ab633e4..683a4c21bc 100644 --- a/test/CodeGen/Hexagon/static.ll +++ b/test/CodeGen/Hexagon/static.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s @num = external global i32 @acc = external global i32 diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll index b266ce61a8..1b2fbc8932 100644 --- a/test/CodeGen/Mips/brdelayslot.ll +++ b/test/CodeGen/Mips/brdelayslot.ll @@ -1,12 +1,18 @@ -; RUN: llc -march=mipsel -enable-mips-delay-filler < %s | FileCheck %s +; RUN: llc -march=mipsel -O0 < %s | FileCheck %s -check-prefix=None +; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=Default define void @foo1() nounwind { entry: -; CHECK: jalr -; CHECK-NOT: nop -; CHECK: jr -; CHECK-NOT: nop -; CHECK: .end +; Default: jalr +; Default-NOT: nop +; Default: jr +; Default-NOT: nop +; Default: .end +; None: jalr +; None: nop +; None: jr +; None: nop +; None: .end tail call void @foo2(i32 3) nounwind ret void diff --git a/test/CodeGen/Mips/init-array.ll b/test/CodeGen/Mips/init-array.ll new file mode 100644 index 0000000000..f96ce26472 --- /dev/null +++ b/test/CodeGen/Mips/init-array.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple mipsel-unknown-linux -use-init-array < %s | FileCheck %s + +target triple = "mipsel-unknown-linux" + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @test }] +; CHECK: .section +; CHECK: .init_array +; CHECK-NOT: .ctors +; CHECK: .4byte test + +define internal void @test() section ".text.startup" { +entry: + ret void +} diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index 0227b88fbc..873b9f1410 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -6,9 +6,15 @@ define void @foo1(i32 %s) nounwind { entry: ; O32: bal +; O32: lui $at, 0 +; O32: addiu $at, $at, {{[0-9]+}} +; N64: lui $at, 0 +; N64: daddiu $at, $at, 0 +; N64: dsll $at, $at, 16 +; N64: daddiu $at, $at, 0 ; N64: bal -; N64: highest -; N64: higher +; N64: dsll $at, $at, 16 +; N64: daddiu $at, $at, {{[0-9]+}} %tobool = icmp eq i32 %s, 0 br i1 %tobool, label %if.end, label %if.then diff --git a/test/CodeGen/Mips/small-section-reserve-gp.ll b/test/CodeGen/Mips/small-section-reserve-gp.ll new file mode 100644 index 0000000000..03503fb2ae --- /dev/null +++ b/test/CodeGen/Mips/small-section-reserve-gp.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=mipsel-sde-elf -march=mipsel -relocation-model=static < %s \ +; RUN: | FileCheck %s + +@i = internal unnamed_addr global i32 0, align 4 + +define i32 @geti() nounwind readonly { +entry: +; CHECK: lw ${{[0-9]+}}, %gp_rel(i)($gp) + %0 = load i32* @i, align 4 + ret i32 %0 +} + diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll index d681091f4c..ce98cc8262 100644 --- a/test/CodeGen/Mips/tls-alias.ll +++ b/test/CodeGen/Mips/tls-alias.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s +; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s @foo = thread_local global i32 42 @bar = hidden alias i32* @foo diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll index a7ddb96e43..72d30dc369 100644 --- a/test/CodeGen/Mips/tls.ll +++ b/test/CodeGen/Mips/tls.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC -; RUN: llc -march=mipsel -relocation-model=static < %s \ -; RUN: | FileCheck %s -check-prefix=STATIC -; RUN: llc -march=mipsel -relocation-model=static < %s \ -; RUN: -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP +; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \ +; RUN: FileCheck %s -check-prefix=PIC +; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler < \ +; RUN: %s | FileCheck %s -check-prefix=STATIC +; RUN: llc -march=mipsel -relocation-model=static -disable-mips-delay-filler \ +; RUN: -mips-fix-global-base-reg=false < %s | \ +; RUN: FileCheck %s -check-prefix=STATICGP @t1 = thread_local global i32 0, align 4 diff --git a/test/CodeGen/Mips/uitofp.ll b/test/CodeGen/Mips/uitofp.ll new file mode 100644 index 0000000000..aff70c24f0 --- /dev/null +++ b/test/CodeGen/Mips/uitofp.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=mips -mattr=+single-float < %s + +define void @f0() nounwind { +entry: + %b = alloca i32, align 4 + %a = alloca float, align 4 + store volatile i32 1, i32* %b, align 4 + %0 = load volatile i32* %b, align 4 + %conv = uitofp i32 %0 to float + store float %conv, float* %a, align 4 + ret void +} diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll index 0003a17c22..b95ac68807 100644 --- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll +++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll @@ -9,9 +9,8 @@ target triple = "powerpc-apple-darwin11.0" define void @foo() nounwind ssp { entry: -; Better: mtctr r12 -; CHECK: mr r12, [[REG:r[0-9]+]] -; CHECK: mtctr [[REG]] +; CHECK: mtctr r12 +; CHECK: bctrl %0 = load void (...)** @p, align 4 ; <void (...)*> [#uses=1] call void (...)* %0() nounwind br label %return diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll index 9a456b6ecc..638059a38e 100644 --- a/test/CodeGen/PowerPC/big-endian-formal-args.ll +++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll @@ -2,10 +2,10 @@ declare void @bar(i64 %x, i64 %y) -; CHECK: li {{[53]}}, 0 +; CHECK: li 3, 0 ; CHECK: li 4, 2 +; CHECK: li 5, 0 ; CHECK: li 6, 3 -; CHECK: mr {{[53]}}, {{[53]}} define void @foo() { call void @bar(i64 2, i64 3) diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll new file mode 100644 index 0000000000..afa1ea8e75 --- /dev/null +++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-linux" + +@.str = private unnamed_addr constant [3 x i8] c"%i\00", align 1 + +define void @test(i32 %count) nounwind { +entry: +; CHECK: crxor 6, 6, 6 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind + %cmp2 = icmp sgt i32 %count, 0 + br i1 %cmp2, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ] +; CHECK: crxor 6, 6, 6 + %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind + %inc = add nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, %count + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/PowerPC/fsl-e500mc.ll b/test/CodeGen/PowerPC/fsl-e500mc.ll new file mode 100644 index 0000000000..09b7e41b18 --- /dev/null +++ b/test/CodeGen/PowerPC/fsl-e500mc.ll @@ -0,0 +1,22 @@ +; +; Test support for Freescale e500mc and its higher memcpy inlining thresholds. +; +; RUN: llc -mcpu=e500mc < %s 2>&1 | FileCheck %s +; CHECK-NOT: not a recognized processor for this target + +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-fsl-linux" + +%struct.teststruct = type { [12 x i32], i32 } + +define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind { +entry: +; CHECK: @copy +; CHECK-NOT: bl memcpy + %0 = bitcast %struct.teststruct* %agg.result to i8* + %1 = bitcast %struct.teststruct* %in to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 52, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind diff --git a/test/CodeGen/PowerPC/fsl-e5500.ll b/test/CodeGen/PowerPC/fsl-e5500.ll new file mode 100644 index 0000000000..d47d8c8ed4 --- /dev/null +++ b/test/CodeGen/PowerPC/fsl-e5500.ll @@ -0,0 +1,22 @@ +; +; Test support for Freescale e5500 and its higher memcpy inlining thresholds. +; +; RUN: llc -mcpu=e5500 < %s 2>&1 | FileCheck %s +; CHECK-NOT: not a recognized processor for this target + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-fsl-linux" + +%struct.teststruct = type { [24 x i32], i32 } + +define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind { +entry: +; CHECK: @copy +; CHECK-NOT: bl memcpy + %0 = bitcast %struct.teststruct* %agg.result to i8* + %1 = bitcast %struct.teststruct* %in to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 100, i32 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind diff --git a/test/CodeGen/PowerPC/inlineasm-copy.ll b/test/CodeGen/PowerPC/inlineasm-copy.ll index e1ff82d5f9..59c3388835 100644 --- a/test/CodeGen/PowerPC/inlineasm-copy.ll +++ b/test/CodeGen/PowerPC/inlineasm-copy.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=ppc32 | not grep mr +; RUN: llc < %s -march=ppc32 -verify-machineinstrs | FileCheck %s +; CHECK-NOT: mr define i32 @test(i32 %Y, i32 %X) { entry: %tmp = tail call i32 asm "foo $0", "=r"( ) ; <i32> [#uses=1] @@ -12,3 +13,9 @@ entry: ret i32 %tmp1 } +; CHECK: test3 +define i32 @test3(i32 %Y, i32 %X) { +entry: + %tmp1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "foo $0, $1", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"( i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y, i32 %X, i32 %Y ) ; <i32> [#uses=1] + ret i32 1 +} diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll new file mode 100644 index 0000000000..f1326ba992 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-toc.ll @@ -0,0 +1,67 @@ +; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@double_array = global [32 x double] zeroinitializer, align 8
+@number64 = global i64 10, align 8
+@internal_static_var.x = internal unnamed_addr global i64 0, align 8
+
+define i64 @access_int64(i64 %a) nounwind readonly {
+entry:
+; CHECK: access_int64:
+; CHECK-NEXT: .align 3
+; CHECK-NEXT: .quad .L.access_int64
+; CHECK-NEXT: .quad .TOC.@tocbase
+; CHECK-NEXT: .text
+ %0 = load i64* @number64, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+ %cmp = icmp eq i64 %0, %a
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i64 @internal_static_var(i64 %a) nounwind {
+entry:
+; CHECK: internal_static_var:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+ %0 = load i64* @internal_static_var.x, align 8
+ %cmp = icmp eq i64 %0, %a
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
+define i32 @access_double(double %a) nounwind readnone {
+entry:
+; CHECK: access_double:
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+ %cmp = fcmp oeq double %a, 2.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+
+define i32 @access_double_array(double %a, i32 %i) nounwind readonly {
+entry:
+; CHECK: access_double_array:
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom
+ %0 = load double* %arrayidx, align 8
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
+ %cmp = fcmp oeq double %0, %a
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
+; Check the creation of 4 .tc entries:
+; * int64_t global 'number64'
+; * double constant 2.0
+; * double array 'double_array'
+; * static int64_t 'x' accessed within '@internal_static_var'
+; CHECK: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
+; CHECK-NEXT: .LC{{[0-9]+}}:
+; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
diff --git a/test/CodeGen/PowerPC/ppc64-zext.ll b/test/CodeGen/PowerPC/ppc64-zext.ll new file mode 100644 index 0000000000..eb55445cc6 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-zext.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux" + +define i64 @fun(i32 %arg32) nounwind { +entry: +; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32 + %o = zext i32 %arg32 to i64 + ret i64 %o +} + diff --git a/test/CodeGen/PowerPC/pr13641.ll b/test/CodeGen/PowerPC/pr13641.ll new file mode 100644 index 0000000000..c4d3f3a9dc --- /dev/null +++ b/test/CodeGen/PowerPC/pr13641.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @foo() nounwind { + ret void +} + +; CHECK: blr +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .quad 0 diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll new file mode 100644 index 0000000000..520921f57a --- /dev/null +++ b/test/CodeGen/PowerPC/remat-imm.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s | FileCheck %s +; ModuleID = 'test.c' +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" +target triple = "powerpc-unknown-linux" + +@.str = private unnamed_addr constant [6 x i8] c"%d,%d\00", align 1 + +define i32 @main() nounwind { +entry: +; CHECK: li 4, 128 +; CHECK-NOT: mr 4, {{.*}} + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/Thumb/thumb_jump24_fixup.ll b/test/CodeGen/Thumb/thumb_jump24_fixup.ll new file mode 100644 index 0000000000..e6a6b25ca1 --- /dev/null +++ b/test/CodeGen/Thumb/thumb_jump24_fixup.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple thumbv7-none-linux-gnueabi -mcpu=cortex-a8 -march=thumb -mattr=thumb2 -filetype=obj -o - < %s | llvm-objdump -r - | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64" +target triple = "thumbv7-none-linux-gnueabi" + +define i32 @test_fixup_t2_uncondbranch() { +b0: + invoke void @__cxa_throw(i8* null, i8* null, i8* null) noreturn + to label %unreachable unwind label %lpad + +; CHECK: {{[0-9]+}} R_ARM_THM_JUMP24 __cxa_throw + +lpad: + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup + ret i32 0 + +unreachable: + unreachable +} + +declare i32 @__gxx_personality_v0(...) + +declare void @__cxa_throw(i8*, i8*, i8*) diff --git a/test/CodeGen/Thumb2/longMACt.ll b/test/CodeGen/Thumb2/longMACt.ll new file mode 100644 index 0000000000..beefd6044c --- /dev/null +++ b/test/CodeGen/Thumb2/longMACt.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; Check generated signed and unsigned multiply accumulate long. + +define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { +;CHECK: MACLongTest1: +;CHECK: umlal + %conv = zext i32 %a to i64 + %conv1 = zext i32 %b to i64 + %mul = mul i64 %conv1, %conv + %add = add i64 %mul, %c + ret i64 %add +} + +define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) { +;CHECK: MACLongTest2: +;CHECK: smlal + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv1, %conv + %add = add nsw i64 %mul, %c + ret i64 %add +} + +define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) { +;CHECK: MACLongTest3: +;CHECK: umlal + %conv = zext i32 %b to i64 + %conv1 = zext i32 %a to i64 + %mul = mul i64 %conv, %conv1 + %conv2 = zext i32 %c to i64 + %add = add i64 %mul, %conv2 + ret i64 %add +} + +define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) { +;CHECK: MACLongTest4: +;CHECK: smlal + %conv = sext i32 %b to i64 + %conv1 = sext i32 %a to i64 + %mul = mul nsw i64 %conv, %conv1 + %conv2 = sext i32 %c to i64 + %add = add nsw i64 %mul, %conv2 + ret i64 %add +} diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll index ead198f216..ed4d26d746 100644 --- a/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -5,7 +5,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: mvn r0, #-2147483648 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle.w r1, r1, r0 +; CHECK: addle r1, r0 ; CHECK: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 @@ -30,7 +30,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t3 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: suble.w r1, r1, #10 +; CHECK: suble r1, #10 ; CHECK: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll index 9a66b670c7..04659522d3 100644 --- a/test/CodeGen/X86/2012-04-26-sdglue.ll +++ b/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -5,7 +5,7 @@ ; It's hard to test for the ISEL condition because CodeGen optimizes ; away the bugpointed code. Just ensure the basics are still there. ;CHECK: func: -;CHECK: vpxor +;CHECK: vxorps ;CHECK: vinsertf128 ;CHECK: vpshufd ;CHECK: vpshufd diff --git a/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll b/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll new file mode 100644 index 0000000000..6ebbb2e97d --- /dev/null +++ b/test/CodeGen/X86/2012-08-28-UnsafeMathCrash.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -enable-unsafe-fp-math +; <rdar://problem/12180135> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.8.0" + +define i32 @foo(float %mean) nounwind readnone ssp align 2 { +entry: + %cmp = fcmp olt float %mean, -3.000000e+00 + %f.0 = select i1 %cmp, float -3.000000e+00, float %mean + %cmp2 = fcmp ult float %f.0, 3.000000e+00 + %f.1 = select i1 %cmp2, float %f.0, float 0x4007EB8520000000 + %add = fadd float %f.1, 3.000000e+00 + %div = fdiv float %add, 2.343750e-02 + %0 = fpext float %div to double + %conv = select i1 undef, double 2.550000e+02, double %0 + %add8 = fadd double %conv, 5.000000e-01 + %conv9 = fptosi double %add8 to i32 + %.conv9 = select i1 undef, i32 255, i32 %conv9 + ret i32 %.conv9 +} diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll new file mode 100644 index 0000000000..26ed0ecc0c --- /dev/null +++ b/test/CodeGen/X86/StackColoring.ll @@ -0,0 +1,272 @@ +; RUN: llc -mcpu=corei7 -no-stack-coloring=false < %s | FileCheck %s --check-prefix=YESCOLOR +; RUN: llc -mcpu=corei7 -no-stack-coloring=true < %s | FileCheck %s --check-prefix=NOCOLOR + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;YESCOLOR: subq $136, %rsp +;NOCOLOR: subq $264, %rsp + + +define i32 @myCall_w2(i32 %in) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b) + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + call void @llvm.lifetime.end(i64 -1, i8* %b) + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + call void @llvm.lifetime.end(i64 -1, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +} + + +;YESCOLOR: subq $272, %rsp +;NOCOLOR: subq $272, %rsp + +define i32 @myCall2_no_merge(i32 %in, i1 %d) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b) + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + br i1 %d, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + call void @llvm.lifetime.end(i64 -1, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + call void @llvm.lifetime.end(i64 -1, i8* %b) + ret i32 %t7 +bb3: + call void @llvm.lifetime.end(i64 -1, i8* %b) + ret i32 0 +} + +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp + +define i32 @myCall2_w2(i32 %in, i1 %d) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b) + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + call void @llvm.lifetime.end(i64 -1, i8* %b) + br i1 %d, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + call void @llvm.lifetime.end(i64 -1, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +bb3: + ret i32 0 +} +;YESCOLOR: subq $208, %rsp +;NOCOLOR: subq $400, %rsp + + + + +define i32 @myCall_w4(i32 %in) { +entry: + %a1 = alloca [14 x i8*], align 8 + %a2 = alloca [13 x i8*], align 8 + %a3 = alloca [12 x i8*], align 8 + %a4 = alloca [11 x i8*], align 8 + %b1 = bitcast [14 x i8*]* %a1 to i8* + %b2 = bitcast [13 x i8*]* %a2 to i8* + %b3 = bitcast [12 x i8*]* %a3 to i8* + %b4 = bitcast [11 x i8*]* %a4 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b4) + call void @llvm.lifetime.start(i64 -1, i8* %b1) + %t1 = call i32 @foo(i32 %in, i8* %b1) + %t2 = call i32 @foo(i32 %in, i8* %b1) + call void @llvm.lifetime.end(i64 -1, i8* %b1) + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t9 = call i32 @foo(i32 %in, i8* %b2) + %t8 = call i32 @foo(i32 %in, i8* %b2) + call void @llvm.lifetime.end(i64 -1, i8* %b2) + call void @llvm.lifetime.start(i64 -1, i8* %b3) + %t3 = call i32 @foo(i32 %in, i8* %b3) + %t4 = call i32 @foo(i32 %in, i8* %b3) + call void @llvm.lifetime.end(i64 -1, i8* %b3) + %t11 = call i32 @foo(i32 %in, i8* %b4) + call void @llvm.lifetime.end(i64 -1, i8* %b4) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +} + +;YESCOLOR: subq $112, %rsp +;NOCOLOR: subq $400, %rsp + +define i32 @myCall2_w4(i32 %in) { +entry: + %a1 = alloca [14 x i8*], align 8 + %a2 = alloca [13 x i8*], align 8 + %a3 = alloca [12 x i8*], align 8 + %a4 = alloca [11 x i8*], align 8 + %b1 = bitcast [14 x i8*]* %a1 to i8* + %b2 = bitcast [13 x i8*]* %a2 to i8* + %b3 = bitcast [12 x i8*]* %a3 to i8* + %b4 = bitcast [11 x i8*]* %a4 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b1) + %t1 = call i32 @foo(i32 %in, i8* %b1) + %t2 = call i32 @foo(i32 %in, i8* %b1) + call void @llvm.lifetime.end(i64 -1, i8* %b1) + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t9 = call i32 @foo(i32 %in, i8* %b2) + %t8 = call i32 @foo(i32 %in, i8* %b2) + call void @llvm.lifetime.end(i64 -1, i8* %b2) + call void @llvm.lifetime.start(i64 -1, i8* %b3) + %t3 = call i32 @foo(i32 %in, i8* %b3) + %t4 = call i32 @foo(i32 %in, i8* %b3) + call void @llvm.lifetime.end(i64 -1, i8* %b3) + br i1 undef, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.start(i64 -1, i8* %b4) + %t11 = call i32 @foo(i32 %in, i8* %b4) + call void @llvm.lifetime.end(i64 -1, i8* %b4) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +bb3: + ret i32 0 +} + + +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp + + +define i32 @myCall2_noend(i32 %in, i1 %d) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b) + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + call void @llvm.lifetime.end(i64 -1, i8* %b) + br i1 %d, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +bb3: + ret i32 0 +} + +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp +define i32 @myCall2_noend2(i32 %in, i1 %d) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %b) + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + br i1 %d, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.end(i64 -1, i8* %b) + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +bb3: + ret i32 0 +} + + +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp +define i32 @myCall2_nostart(i32 %in, i1 %d) { +entry: + %a = alloca [17 x i8*], align 8 + %a2 = alloca [16 x i8*], align 8 + %b = bitcast [17 x i8*]* %a to i8* + %b2 = bitcast [16 x i8*]* %a2 to i8* + %t1 = call i32 @foo(i32 %in, i8* %b) + %t2 = call i32 @foo(i32 %in, i8* %b) + call void @llvm.lifetime.end(i64 -1, i8* %b) + br i1 %d, label %bb2, label %bb3 +bb2: + call void @llvm.lifetime.start(i64 -1, i8* %b2) + %t3 = call i32 @foo(i32 %in, i8* %b2) + %t4 = call i32 @foo(i32 %in, i8* %b2) + %t5 = add i32 %t1, %t2 + %t6 = add i32 %t3, %t4 + %t7 = add i32 %t5, %t6 + ret i32 %t7 +bb3: + ret i32 0 +} + +; Adopt the test from Transforms/Inline/array_merge.ll' +;YESCOLOR: subq $816, %rsp +;NOCOLOR: subq $1616, %rsp +define void @array_merge() nounwind ssp { +entry: + %A.i1 = alloca [100 x i32], align 4 + %B.i2 = alloca [100 x i32], align 4 + %A.i = alloca [100 x i32], align 4 + %B.i = alloca [100 x i32], align 4 + %0 = bitcast [100 x i32]* %A.i to i8* + call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind + %1 = bitcast [100 x i32]* %B.i to i8* + call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind + call void @bar([100 x i32]* %A.i, [100 x i32]* %B.i) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind + %2 = bitcast [100 x i32]* %A.i1 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %2) nounwind + %3 = bitcast [100 x i32]* %B.i2 to i8* + call void @llvm.lifetime.start(i64 -1, i8* %3) nounwind + call void @bar([100 x i32]* %A.i1, [100 x i32]* %B.i2) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %2) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %3) nounwind + ret void +} + +declare void @bar([100 x i32]* , [100 x i32]*) nounwind + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + + declare i32 @foo(i32, i8*) + diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll new file mode 100644 index 0000000000..e7c9605d3e --- /dev/null +++ b/test/CodeGen/X86/atom-bypass-slow-division.ll @@ -0,0 +1,112 @@ +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s + +define i32 @test_get_quotient(i32 %a, i32 %b) nounwind { +; CHECK: test_get_quotient +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: ret +; CHECK: divb +; CHECK: ret + %result = sdiv i32 %a, %b + ret i32 %result +} + +define i32 @test_get_remainder(i32 %a, i32 %b) nounwind { +; CHECK: test_get_remainder +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: ret +; CHECK: divb +; CHECK: ret + %result = srem i32 %a, %b + ret i32 %result +} + +define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { +; CHECK: test_get_quotient_and_remainder +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: divb +; CHECK: addl +; CHECK: ret +; CEECK-NOT: idivl +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, %b + %resultrem = srem i32 %a, %b + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind { +; CHECK: test_use_div_and_idiv +; CHECK: idivl +; CHECK: divb +; CHECK: divl +; CHECK: divb +; CHECK: addl +; CHECK: ret + %resultidiv = sdiv i32 %a, %b + %resultdiv = udiv i32 %a, %b + %result = add i32 %resultidiv, %resultdiv + ret i32 %result +} + +define i32 @test_use_div_imm_imm() nounwind { +; CHECK: test_use_div_imm_imm +; CHECK: movl $64 + %resultdiv = sdiv i32 256, 4 + ret i32 %resultdiv +} + +define i32 @test_use_div_reg_imm(i32 %a) nounwind { +; CHECK: test_use_div_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, 33 + ret i32 %resultdiv +} + +define i32 @test_use_rem_reg_imm(i32 %a) nounwind { +; CHECK: test_use_rem_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultrem = srem i32 %a, 33 + ret i32 %resultrem +} + +define i32 @test_use_divrem_reg_imm(i32 %a) nounwind { +; CHECK: test_use_divrem_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, 33 + %resultrem = srem i32 %a, 33 + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @test_use_div_imm_reg(i32 %a) nounwind { +; CHECK: test_use_div_imm_reg +; CHECK: test +; CHECK: idiv +; CHECK: divb + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} + +define i32 @test_use_rem_imm_reg(i32 %a) nounwind { +; CHECK: test_use_rem_imm_reg +; CHECK: test +; CHECK: idiv +; CHECK: divb + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index 9b41709a3b..ec11654b35 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -229,9 +229,8 @@ define <8 x float> @test17(<4 x float> %y) { } ; CHECK: test18 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovshdup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -239,9 +238,8 @@ define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind { } ; CHECK: test19 -; CHECK: vshufps -; CHECK: vshufps -; CHECK: vunpcklps +; CHECK: vmovsldup +; CHECK: vblendps ; CHECK: ret define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll index fe0f6caed3..ff56a45499 100644 --- a/test/CodeGen/X86/avx-vextractf128.ll +++ b/test/CodeGen/X86/avx-vextractf128.ll @@ -19,12 +19,12 @@ entry: } ; CHECK: @t0 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 +; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NOT: vmovaps %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) +; CHECK: vextractf128 $1, %ymm0, (%rdi) define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { entry: - %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0) + %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 1) %1 = bitcast float* %addr to <4 x float>* store <4 x float> %0, <4 x float>* %1, align 16 ret void @@ -32,27 +32,13 @@ entry: declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone -; CHECK: @t1 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 -; CHECK-NOT: vmovups %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) -define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp { -entry: - %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0) - %1 = bitcast float* %addr to i8* - tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0) - ret void -} - -declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind - ; CHECK: @t2 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 +; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NOT: vmovaps %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) +; CHECK: vextractf128 $1, %ymm0, (%rdi) define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { entry: - %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0) + %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 1) %1 = bitcast double* %addr to <2 x double>* store <2 x double> %0, <2 x double>* %1, align 16 ret void @@ -60,28 +46,14 @@ entry: declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone -; CHECK: @t3 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 -; CHECK-NOT: vmovups %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) -define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp { -entry: - %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0) - %1 = bitcast double* %addr to i8* - tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0) - ret void -} - -declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind - ; CHECK: @t4 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 +; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NOT: vmovaps %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) +; CHECK: vextractf128 $1, %ymm0, (%rdi) define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { entry: %0 = bitcast <4 x i64> %a to <8 x i32> - %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0) + %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 1) %2 = bitcast <4 x i32> %1 to <2 x i64> store <2 x i64> %2, <2 x i64>* %addr, align 16 ret void @@ -90,17 +62,43 @@ entry: declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone ; CHECK: @t5 -; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0 -; CHECK-NOT: vmovdqu %xmm0, (%rdi) -; CHECK: vextractf128 $0, %ymm0, (%rdi) -define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp { +; CHECK: vmovaps %xmm0, (%rdi) +define void @t5(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp { +entry: + %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0) + %1 = bitcast float* %addr to <4 x float>* + store <4 x float> %0, <4 x float>* %1, align 16 + ret void +} + +; CHECK: @t6 +; CHECK: vmovaps %xmm0, (%rdi) +define void @t6(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp { +entry: + %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0) + %1 = bitcast double* %addr to <2 x double>* + store <2 x double> %0, <2 x double>* %1, align 16 + ret void +} + +; CHECK: @t7 +; CHECK: vmovaps %xmm0, (%rdi) +define void @t7(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { entry: %0 = bitcast <4 x i64> %a to <8 x i32> %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0) - %2 = bitcast <2 x i64>* %addr to i8* - %3 = bitcast <4 x i32> %1 to <16 x i8> - tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3) + %2 = bitcast <4 x i32> %1 to <2 x i64> + store <2 x i64> %2, <2 x i64>* %addr, align 16 ret void } -declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind +; CHECK: @t8 +; CHECK: vmovups %xmm0, (%rdi) +define void @t8(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { +entry: + %0 = bitcast <4 x i64> %a to <8 x i32> + %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0) + %2 = bitcast <4 x i32> %1 to <2 x i64> + store <2 x i64> %2, <2 x i64>* %addr, align 1 + ret void +} diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll index c5899fa274..8af9373e50 100644 --- a/test/CodeGen/X86/avx2-shuffle.ll +++ b/test/CodeGen/X86/avx2-shuffle.ll @@ -26,3 +26,14 @@ entry: %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15> ret <16 x i16> %shuffle.i } + +; CHECK: vpshufb_test +; CHECK; vpshufb {{.*\(%r.*}}, %ymm +; CHECK: ret +define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind { + %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, + i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, + i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, + i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18> + ret <32 x i8>%S +}
\ No newline at end of file diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index d8f4663c94..85a70aad75 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s +; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s +; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -197,6 +198,11 @@ block2: ; CHECK: cvtsi2sdq {{.*}} %xmm0 ; CHECK: movb $1, %al ; CHECK: callq _test16callee + +; AVX: movabsq $1 +; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0 +; AVX: movb $1, %al +; AVX: callq _test16callee call void (...)* @test16callee(double 1.000000e+00) ret void } diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll index b0c1d0a0dd..bd3514cc3f 100644 --- a/test/CodeGen/X86/fma.ll +++ b/test/CodeGen/X86/fma.ll @@ -1,11 +1,13 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST -; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s --check-prefix=CHECK-FMA-CALL -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST -; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK-FMA-CALL +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL +; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK-FMA-INST +; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK-FMA-CALL ; CHECK: test_f32 ; CHECK-FMA-INST: vfmadd213ss -; CHECK-FMA-CALL: _fmaf +; CHECK-FMA-CALL: fmaf define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp { entry: @@ -15,7 +17,7 @@ entry: ; CHECK: test_f64 ; CHECK-FMA-INST: vfmadd213sd -; CHECK-FMA-CALL: _fma +; CHECK-FMA-CALL: fma define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp { entry: @@ -24,7 +26,7 @@ entry: } ; CHECK: test_f80 -; CHECK: _fmal +; CHECK: fmal define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp { entry: diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll index 90529e09d7..e3910a6935 100755 --- a/test/CodeGen/X86/fma3-intrinsics.ll +++ b/test/CodeGen/X86/fma3-intrinsics.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma,+fma4 | FileCheck %s +; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK: fmadd213ss %xmm diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll index fd414b346e..2fe1ecd40e 100644 --- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s ; VFMADD define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) { diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll index 5d97a87b3b..6d98d59b38 100644 --- a/test/CodeGen/X86/fma_patterns.ll +++ b/test/CodeGen/X86/fma_patterns.ll @@ -1,8 +1,13 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4 ; CHECK: test_x86_fmadd_ps -; CHECK: vfmadd213ps %xmm2, %xmm0, %xmm1 +; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_ps +; CHECK_FMA4: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { %x = fmul <4 x float> %a0, %a1 %res = fadd <4 x float> %x, %a2 @@ -10,8 +15,11 @@ define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo } ; CHECK: test_x86_fmsub_ps -; CHECK: fmsub213ps %xmm2, %xmm0, %xmm1 +; CHECK: fmsub213ps %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_ps +; CHECK_FMA4: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { %x = fmul <4 x float> %a0, %a1 %res = fsub <4 x float> %x, %a2 @@ -19,8 +27,11 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo } ; CHECK: test_x86_fnmadd_ps -; CHECK: fnmadd213ps %xmm2, %xmm0, %xmm1 +; CHECK: fnmadd213ps %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmadd_ps +; CHECK_FMA4: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { %x = fmul <4 x float> %a0, %a1 %res = fsub <4 x float> %a2, %x @@ -28,8 +39,11 @@ define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl } ; CHECK: test_x86_fnmsub_ps -; CHECK: fnmsub213ps %xmm2, %xmm0, %xmm1 +; CHECK: fnmsub213ps %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmsub_ps +; CHECK_FMA4: fnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { %x = fmul <4 x float> %a0, %a1 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x @@ -38,8 +52,11 @@ define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x fl } ; CHECK: test_x86_fmadd_ps_y -; CHECK: vfmadd213ps %ymm2, %ymm0, %ymm1 +; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_ps_y +; CHECK_FMA4: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4: ret define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { %x = fmul <8 x float> %a0, %a1 %res = fadd <8 x float> %x, %a2 @@ -47,8 +64,11 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f } ; CHECK: test_x86_fmsub_ps_y -; CHECK: vfmsub213ps %ymm2, %ymm0, %ymm1 +; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_ps_y +; CHECK_FMA4: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4: ret define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { %x = fmul <8 x float> %a0, %a1 %res = fsub <8 x float> %x, %a2 @@ -56,8 +76,11 @@ define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f } ; CHECK: test_x86_fnmadd_ps_y -; CHECK: vfnmadd213ps %ymm2, %ymm0, %ymm1 +; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmadd_ps_y +; CHECK_FMA4: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4: ret define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { %x = fmul <8 x float> %a0, %a1 %res = fsub <8 x float> %a2, %x @@ -65,7 +88,7 @@ define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x } ; CHECK: test_x86_fnmsub_ps_y -; CHECK: vfnmsub213ps %ymm2, %ymm0, %ymm1 +; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 ; CHECK: ret define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { %x = fmul <8 x float> %a0, %a1 @@ -75,8 +98,11 @@ define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x } ; CHECK: test_x86_fmadd_pd_y -; CHECK: vfmadd213pd %ymm2, %ymm0, %ymm1 +; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_pd_y +; CHECK_FMA4: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4: ret define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { %x = fmul <4 x double> %a0, %a1 %res = fadd <4 x double> %x, %a2 @@ -84,8 +110,11 @@ define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 } ; CHECK: test_x86_fmsub_pd_y -; CHECK: vfmsub213pd %ymm2, %ymm0, %ymm1 +; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_pd_y +; CHECK_FMA4: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4: ret define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { %x = fmul <4 x double> %a0, %a1 %res = fsub <4 x double> %x, %a2 @@ -93,8 +122,11 @@ define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 } ; CHECK: test_x86_fmsub_pd -; CHECK: vfmsub213pd %xmm2, %xmm0, %xmm1 +; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_pd +; CHECK_FMA4: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { %x = fmul <2 x double> %a0, %a1 %res = fsub <2 x double> %x, %a2 @@ -102,8 +134,11 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x } ; CHECK: test_x86_fnmadd_ss -; CHECK: vfnmadd213ss %xmm2, %xmm0, %xmm1 +; CHECK: vfnmadd213ss %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmadd_ss +; CHECK_FMA4: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) { %x = fmul float %a0, %a1 %res = fsub float %a2, %x @@ -111,8 +146,11 @@ define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) { } ; CHECK: test_x86_fnmadd_sd -; CHECK: vfnmadd213sd %xmm2, %xmm0, %xmm1 +; CHECK: vfnmadd213sd %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmadd_sd +; CHECK_FMA4: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) { %x = fmul double %a0, %a1 %res = fsub double %a2, %x @@ -120,8 +158,11 @@ define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) { } ; CHECK: test_x86_fmsub_sd -; CHECK: vfmsub213sd %xmm2, %xmm0, %xmm1 +; CHECK: vfmsub213sd %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_sd +; CHECK_FMA4: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) { %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -129,11 +170,43 @@ define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) { } ; CHECK: test_x86_fnmsub_ss -; CHECK: vfnmsub213ss %xmm2, %xmm0, %xmm1 +; CHECK: vfnmsub213ss %xmm2, %xmm1, %xmm0 ; CHECK: ret +; CHECK_FMA4: test_x86_fnmsub_ss +; CHECK_FMA4: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4: ret define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) { %x = fsub float -0.000000e+00, %a0 %y = fmul float %x, %a1 %res = fsub float %y, %a2 ret float %res } + +; CHECK: test_x86_fmadd_ps +; CHECK: vmovaps (%rdi), %xmm2 +; CHECK: vfmadd213ps %xmm1, %xmm0, %xmm2 +; CHECK: ret +; CHECK_FMA4: test_x86_fmadd_ps +; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 +; CHECK_FMA4: ret +define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { + %x = load <4 x float>* %a0 + %y = fmul <4 x float> %x, %a1 + %res = fadd <4 x float> %y, %a2 + ret <4 x float> %res +} + +; CHECK: test_x86_fmsub_ps +; CHECK: vmovaps (%rdi), %xmm2 +; CHECK: fmsub213ps %xmm1, %xmm0, %xmm2 +; CHECK: ret +; CHECK_FMA4: test_x86_fmsub_ps +; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 +; CHECK_FMA4: ret +define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) { + %x = load <4 x float>* %a0 + %y = fmul <4 x float> %x, %a1 + %res = fsub <4 x float> %y, %a2 + ret <4 x float> %res +} + diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll new file mode 100644 index 0000000000..091f0de930 --- /dev/null +++ b/test/CodeGen/X86/fp-fast.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=x86-64 -mattr=-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s + +; CHECK: test1 +define float @test1(float %a) { +; CHECK-NOT: addss +; CHECK: mulss +; CHECK-NOT: addss +; CHECK: ret + %t1 = fadd float %a, %a + %r = fadd float %t1, %t1 + ret float %r +} + +; CHECK: test2 +define float @test2(float %a) { +; CHECK-NOT: addss +; CHECK: mulss +; CHECK-NOT: addss +; CHECK: ret + %t1 = fmul float 4.0, %a + %t2 = fadd float %a, %a + %r = fadd float %t1, %t2 + ret float %r +} + +; CHECK: test3 +define float @test3(float %a) { +; CHECK-NOT: addss +; CHECK: xorps +; CHECK-NOT: addss +; CHECK: ret + %t1 = fmul float 2.0, %a + %t2 = fadd float %a, %a + %r = fsub float %t1, %t2 + ret float %r +} + diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll index 91576fb09e..597236e362 100644 --- a/test/CodeGen/X86/inline-asm-tied.ll +++ b/test/CodeGen/X86/inline-asm-tied.ll @@ -19,3 +19,12 @@ entry: %1 = load i64* %retval ; <i64> [#uses=1] ret i64 %1 } + +; The tied operands are not necessarily in the same order as the defs. +; PR13742 +define i64 @swapped(i64 %x, i64 %y) nounwind { +entry: + %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind + %x1 = extractvalue { i64, i64 } %x0, 0 + ret i64 %x1 +} diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll index 984d7e57e0..51320dd6d0 100644 --- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll +++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll @@ -1,14 +1,10 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s -; XFAIL: * ; rdar://5571034 ; This requires physreg joining, %vreg13 is live everywhere: ; 304L %CL<def> = COPY %vreg13:sub_8bit; GR32_ABCD:%vreg13 ; 320L %vreg15<def> = COPY %vreg19; GR32:%vreg15 GR32_NOSP:%vreg19 ; 336L %vreg15<def> = SAR32rCL %vreg15, %EFLAGS<imp-def,dead>, %CL<imp-use,kill>; GR32:%vreg15 -; -; This test is XFAIL until the register allocator understands trivial physreg -; interference. <rdar://9802098> define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp { ; CHECK: foo: diff --git a/test/CodeGen/X86/pr12312.ll b/test/CodeGen/X86/pr12312.ll new file mode 100644 index 0000000000..84102f148b --- /dev/null +++ b/test/CodeGen/X86/pr12312.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse41,-avx < %s | FileCheck %s --check-prefix SSE41 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix AVX + +define i32 @veccond(<4 x i32> %input) { +entry: + %0 = bitcast <4 x i32> %input to i128 + %1 = icmp ne i128 %0, 0 + br i1 %1, label %if-true-block, label %endif-block + +if-true-block: ; preds = %entry + ret i32 0 +endif-block: ; preds = %entry, + ret i32 1 +; SSE41: veccond +; SSE41: ptest +; SSE41: ret +; AVX: veccond +; AVX: vptest +; AVX: ret +} + +define i32 @vectest(<4 x i32> %input) { +entry: + %0 = bitcast <4 x i32> %input to i128 + %1 = icmp ne i128 %0, 0 + %2 = zext i1 %1 to i32 + ret i32 %2 +; SSE41: vectest +; SSE41: ptest +; SSE41: ret +; AVX: vectest +; AVX: vptest +; AVX: ret +} + +define i32 @vecsel(<4 x i32> %input, i32 %a, i32 %b) { +entry: + %0 = bitcast <4 x i32> %input to i128 + %1 = icmp ne i128 %0, 0 + %2 = select i1 %1, i32 %a, i32 %b + ret i32 %2 +; SSE41: vecsel +; SSE41: ptest +; SSE41: ret +; AVX: vecsel +; AVX: vptest +; AVX: ret +} diff --git a/test/CodeGen/X86/pr12359.ll b/test/CodeGen/X86/pr12359.ll new file mode 100644 index 0000000000..024b163fa7 --- /dev/null +++ b/test/CodeGen/X86/pr12359.ll @@ -0,0 +1,10 @@ +; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s +define <16 x i8> @shuf(<16 x i8> %inval1) { +entry: + %0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4, i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4> + ret <16 x i8> %0 +; CHECK: shuf +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret +} diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll index 51c3d2363f..b823f0af2c 100644 --- a/test/CodeGen/X86/tls-pic.ll +++ b/test/CodeGen/X86/tls-pic.ll @@ -76,12 +76,12 @@ entry: ; X32: f5: ; X32: leal {{[jk]}}@TLSLDM(%ebx) -; X32-NEXT: calll ___tls_get_addr@PLT -; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax) -; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax) +; X32: calll ___tls_get_addr@PLT +; X32: movl {{[jk]}}@DTPOFF(%e +; X32: addl {{[jk]}}@DTPOFF(%e ; X64: f5: ; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi -; X64-NEXT: callq __tls_get_addr@PLT -; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax) -; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax) +; X64: callq __tls_get_addr@PLT +; X64: movl {{[jk]}}@DTPOFF(%r +; X64: addl {{[jk]}}@DTPOFF(%r diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll index 086af6bb11..8dfc2eab41 100644 --- a/test/CodeGen/X86/vec_shuffle-26.ll +++ b/test/CodeGen/X86/vec_shuffle-26.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -march=x86 -mattr=sse41 -o %t -; RUN: grep unpcklps %t | count 1 -; RUN: grep unpckhps %t | count 3 +; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=atom -mattr=+sse41 | FileCheck -check-prefix=ATOM %s ; Transpose example using the more generic vector shuffle. Return float8 ; instead of float16 @@ -14,6 +13,17 @@ target triple = "i386-apple-cl.1.0" define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind { entry: +; CHECK: transpose2 +; CHECK: unpckhps +; CHECK: unpckhps +; CHECK: unpcklps +; CHECK: unpckhps +; Different instruction order for Atom. +; ATOM: transpose2 +; ATOM: unpckhps +; ATOM: unpckhps +; ATOM: unpckhps +; ATOM: unpcklps %unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2] %unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=2] %unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=2] @@ -27,3 +37,32 @@ entry: ; %r3 = shufflevector <8 x float> %r1, <8 x float> %r2, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; ret <8 x float> %r2 } + +define <2 x i64> @lo_hi_shift(float* nocapture %x, float* nocapture %y) nounwind { +entry: +; movhps should happen before extractps to assure it gets the correct value. +; CHECK: lo_hi_shift +; CHECK: movhps ([[BASEREG:%[a-z]+]]), +; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) +; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) +; ATOM: lo_hi_shift +; ATOM: movhps ([[BASEREG:%[a-z]+]]), +; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) +; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) + %v.i = bitcast float* %y to <4 x float>* + %0 = load <4 x float>* %v.i, align 1 + %1 = bitcast float* %x to <1 x i64>* + %.val = load <1 x i64>* %1, align 1 + %2 = bitcast <1 x i64> %.val to <2 x float> + %shuffle.i = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> + %shuffle1.i = shufflevector <4 x float> %0, <4 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 4, i32 5> + %cast.i = bitcast <4 x float> %0 to <2 x i64> + %extract.i = extractelement <2 x i64> %cast.i, i32 1 + %3 = bitcast float* %x to i64* + store i64 %extract.i, i64* %3, align 4 + %4 = bitcast <4 x float> %0 to <16 x i8> + %5 = bitcast <4 x float> %shuffle1.i to <16 x i8> + %palignr = shufflevector <16 x i8> %5, <16 x i8> %4, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> + %6 = bitcast <16 x i8> %palignr to <2 x i64> + ret <2 x i64> %6 +} diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll index 9705d149dd..dfaa3d6dc9 100644 --- a/test/CodeGen/X86/widen_load-1.ll +++ b/test/CodeGen/X86/widen_load-1.ll @@ -1,12 +1,17 @@ -; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc %s -o - -march=x86-64 -mattr=-avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=SSE +; RUN: llc %s -o - -march=x86-64 -mattr=+avx -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=AVX ; PR4891 ; PR5626 ; This load should be before the call, not after. -; CHECK: movaps compl+128(%rip), %xmm0 -; CHECK: movaps %xmm0, (%rsp) -; CHECK: callq killcommon +; SSE: movaps compl+128(%rip), %xmm0 +; SSE: movaps %xmm0, (%rsp) +; SSE: callq killcommon + +; AVX: vmovapd compl+128(%rip), %xmm0 +; AVX: vmovapd %xmm0, (%rsp) +; AVX: callq killcommon @compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1] diff --git a/test/DebugInfo/2010-04-13-PubType.ll b/test/DebugInfo/2010-04-13-PubType.ll index db7bb0ad60..559f032cb3 100644 --- a/test/DebugInfo/2010-04-13-PubType.ll +++ b/test/DebugInfo/2010-04-13-PubType.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -asm-verbose < %s > %t -; RUN: grep "External Name" %t | grep -v X -; RUN: grep "External Name" %t | grep Y | count 1 +; RUN: llc -O0 -asm-verbose -mtriple=x86_64-macosx < %s | FileCheck %s +; CHECK-NOT: .asciz "X" ## External Name +; CHECK: .asciz "Y" ## External Name ; Test to check type with no definition is listed in pubtypes section. %struct.X = type opaque %struct.Y = type { i32 } diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 Binary files differnew file mode 100755 index 0000000000..9a1d5383ca --- /dev/null +++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index a22707189b..58fb055736 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -7,16 +7,15 @@ ; first check that we have a TAG_subprogram at a given offset and it has ; AT_inline. -; CHECK: 0x00000134: DW_TAG_subprogram [18] -; CHECK-NEXT: DW_AT_MIPS_linkage_name +; CHECK: 0x0000011e: DW_TAG_subprogram [18] ; CHECK-NEXT: DW_AT_specification ; CHECK-NEXT: DW_AT_inline ; and then that a TAG_subprogram refers to it with AT_abstract_origin. -; CHECK: 0x00000184: DW_TAG_subprogram [20] -; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x0134 => {0x00000134}) +; CHECK: 0x0000015f: DW_TAG_subprogram [20] +; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x011e => {0x0000011e}) define i32 @_ZN17nsAutoRefCnt7ReleaseEv() { entry: diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll index 2cd100156a..caf12c2756 100644 --- a/test/DebugInfo/X86/stringpool.ll +++ b/test/DebugInfo/X86/stringpool.ll @@ -16,8 +16,8 @@ ; Verify that we refer to 'yyyy' with a relocation. ; LINUX: .long .Lstring3 # DW_AT_name -; LINUX-NEXT: .long 39 # DW_AT_type -; LINUX-NEXT: .byte 1 # DW_AT_external +; LINUX-NEXT: .long 38 # DW_AT_type +; LINUX-NEXT: # DW_AT_external ; LINUX-NEXT: .byte 1 # DW_AT_decl_file ; LINUX-NEXT: .byte 1 # DW_AT_decl_line ; LINUX-NEXT: .byte 9 # DW_AT_location diff --git a/test/DebugInfo/dwarfdump-inlining.test b/test/DebugInfo/dwarfdump-inlining.test new file mode 100644 index 0000000000..d3a7e12a87 --- /dev/null +++ b/test/DebugInfo/dwarfdump-inlining.test @@ -0,0 +1,28 @@ +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x613 \ +RUN: --inlining --functions | FileCheck %s -check-prefix DEEP_STACK +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x6de \ +RUN: --inlining | FileCheck %s -check-prefix SHORTER_STACK +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x685 \ +RUN: --inlining | FileCheck %s -check-prefix SHORT_STACK +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-inl-test.elf-x86-64 --address=0x640 \ +RUN: --functions | FileCheck %s -check-prefix INL_FUNC_NAME + +DEEP_STACK: inlined_h +DEEP_STACK-NEXT: header.h:2:21 +DEEP_STACK-NEXT: inlined_g +DEEP_STACK-NEXT: header.h:7 +DEEP_STACK-NEXT: inlined_f +DEEP_STACK-NEXT: main.cc:3 +DEEP_STACK-NEXT: main +DEEP_STACK-NEXT: main.cc:8 + +SHORTER_STACK: header.h:7:20 +SHORTER_STACK-NEXT: main.cc:3 +SHORTER_STACK-NEXT: main.cc:8 + +SHORT_STACK: main.cc:3:20 +SHORT_STACK-NEXT: main.cc:8 + +INL_FUNC_NAME: inlined_g +INL_FUNC_NAME-NEXT: header.h:7:20 + diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test index de23dcd9c2..973c3447e3 100644 --- a/test/DebugInfo/dwarfdump-test.test +++ b/test/DebugInfo/dwarfdump-test.test @@ -17,6 +17,8 @@ RUN: --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \ RUN: --address=0x55c --functions \ RUN: | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE +RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \ +RUN: | FileCheck %s -check-prefix DEBUG_RANGES MAIN: main MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10 @@ -44,3 +46,11 @@ INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0 MANY_SEQ_IN_LINE_TABLE: _Z1cv MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0 + +DEBUG_RANGES: .debug_ranges contents: +DEBUG_RANGES-NEXT: 00000000 000000000000055c 0000000000000567 +DEBUG_RANGES-NEXT: 00000000 0000000000000567 000000000000056d +DEBUG_RANGES-NEXT: 00000000 <End of list> +DEBUG_RANGES-NEXT: 00000030 0000000000000570 000000000000057b +DEBUG_RANGES-NEXT: 00000030 0000000000000567 000000000000056d +DEBUG_RANGES-NEXT: 00000030 <End of list> diff --git a/test/DebugInfo/linkage-name.ll b/test/DebugInfo/linkage-name.ll new file mode 100644 index 0000000000..b98492383a --- /dev/null +++ b/test/DebugInfo/linkage-name.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=x86_64-macosx -darwin-gdb-compat=Disable %s -o %t -filetype=obj +; RUN: llvm-dwarfdump %t | FileCheck %s + +; CHECK: DW_TAG_subprogram [9] * +; CHECK-NOT: DW_AT_MIPS_linkage_name +; CHECK: DW_AT_specification + +%class.A = type { i8 } + +@a = global %class.A zeroinitializer, align 1 + +define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 { +entry: + %this.addr = alloca %class.A*, align 8 + %b.addr = alloca i32, align 4 + store %class.A* %this, %class.A** %this.addr, align 8 + call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !21), !dbg !23 + store i32 %b, i32* %b.addr, align 4 + call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !24), !dbg !25 + %this1 = load %class.A** %this.addr + %0 = load i32* %b.addr, align 4, !dbg !26 + ret i32 %0, !dbg !26 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +!llvm.dbg.cu = !{!0} + +!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ] +!1 = metadata !{metadata !2} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !5} +!5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!8 = metadata !{metadata !9, metadata !10, metadata !9} +!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ] +!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ] +!12 = metadata !{metadata !13} +!13 = metadata !{i32 786478, i32 0, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ] +!14 = metadata !{metadata !15} +!15 = metadata !{i32 786468} ; [ DW_TAG_base_type ] +!16 = metadata !{metadata !17} +!17 = metadata !{i32 786468} ; [ DW_TAG_base_type ] +!18 = metadata !{metadata !19} +!19 = metadata !{metadata !20} +!20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ] +!21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ] +!22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] +!23 = metadata !{i32 5, i32 8, metadata !5, null} +!24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] +!25 = metadata !{i32 5, i32 14, metadata !5, null} +!26 = metadata !{i32 6, i32 4, metadata !27, null} +!27 = metadata !{i32 786443, metadata !5, i32 5, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/ExecutionEngine/MCJIT/pr13727.ll b/test/ExecutionEngine/MCJIT/pr13727.ll new file mode 100644 index 0000000000..5fa68f9b87 --- /dev/null +++ b/test/ExecutionEngine/MCJIT/pr13727.ll @@ -0,0 +1,88 @@ +; RUN: %lli -use-mcjit -O0 -disable-lazy-compilation=false %s + +; The intention of this test is to verify that symbols mapped to COMMON in ELF +; work as expected. +; +; Compiled from this C code: +; +; int zero_int; +; double zero_double; +; int zero_arr[10]; +; +; int main() +; { +; zero_arr[zero_int + 5] = 40; +; +; if (zero_double < 1.1) +; zero_arr[zero_int + 2] = 70; +; +; for (int i = 1; i < 10; ++i) { +; zero_arr[i] = zero_arr[i - 1] + zero_arr[i]; +; } +; return zero_arr[9] == 110 ? 0 : -1; +; } + +@zero_int = common global i32 0, align 4 +@zero_arr = common global [10 x i32] zeroinitializer, align 16 +@zero_double = common global double 0.000000e+00, align 8 + +define i32 @main() nounwind { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @zero_int, align 4 + %add = add nsw i32 %0, 5 + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom + store i32 40, i32* %arrayidx, align 4 + %1 = load double* @zero_double, align 8 + %cmp = fcmp olt double %1, 1.100000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %2 = load i32* @zero_int, align 4 + %add1 = add nsw i32 %2, 2 + %idxprom2 = sext i32 %add1 to i64 + %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2 + store i32 70, i32* %arrayidx3, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + store i32 1, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %if.end + %3 = load i32* %i, align 4 + %cmp4 = icmp slt i32 %3, 10 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %4 = load i32* %i, align 4 + %sub = sub nsw i32 %4, 1 + %idxprom5 = sext i32 %sub to i64 + %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5 + %5 = load i32* %arrayidx6, align 4 + %6 = load i32* %i, align 4 + %idxprom7 = sext i32 %6 to i64 + %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7 + %7 = load i32* %arrayidx8, align 4 + %add9 = add nsw i32 %5, %7 + %8 = load i32* %i, align 4 + %idxprom10 = sext i32 %8 to i64 + %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10 + store i32 %add9, i32* %arrayidx11, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %9 = load i32* %i, align 4 + %inc = add nsw i32 %9, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4 + %cmp12 = icmp eq i32 %10, 110 + %cond = select i1 %cmp12, i32 0, i32 -1 + ret i32 %cond +} diff --git a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt index 5ba7d618bf..00b8526468 100644 --- a/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt +++ b/test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt @@ -1,5 +1,4 @@ -# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | grep "invalid instruction encoding" -# XFAIL: * +# RUN: llvm-mc --disassemble %s -triple=armv7-unknown-unknwon -mcpu=cortex-a8 2>&1 | FileCheck %s # Opcode=737 Name=VLD1DUPq8_UPD Format=ARM_FORMAT_NLdSt(30) # 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 @@ -9,3 +8,4 @@ # # 'a' == 1 and data_size == 8 is invalid 0x3d 0x3c 0xa0 0xf4 +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt new file mode 100644 index 0000000000..9bb0995ece --- /dev/null +++ b/test/MC/Disassembler/ARM/invalid-VLD1LNd32_UPD-thumb.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s + +0xa0 0xf9 0x10 0x08 +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt new file mode 100644 index 0000000000..84c98bfbca --- /dev/null +++ b/test/MC/Disassembler/ARM/invalid-VLD4DUPd32_UPD-thumb.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s + +0xa0 0xf9 0xc0 0x0f +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt new file mode 100644 index 0000000000..9024b09531 --- /dev/null +++ b/test/MC/Disassembler/ARM/invalid-VLD4LNd32_UPD-thumb.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s + +0xa0 0xf9 0x30 0x0b +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt new file mode 100644 index 0000000000..9462812f26 --- /dev/null +++ b/test/MC/Disassembler/ARM/invalid-VST1LNd32_UPD-thumb.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s + +0x80 0xf9 0x10 0x08 +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt new file mode 100644 index 0000000000..f6e71bcfd6 --- /dev/null +++ b/test/MC/Disassembler/ARM/invalid-VST4LNd32_UPD-thumb.txt @@ -0,0 +1,4 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s 2>&1 | FileCheck %s + +0x80 0xf9 0x30 0x0b +# CHECK: invalid instruction encoding diff --git a/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt new file mode 100644 index 0000000000..e53739e739 --- /dev/null +++ b/test/MC/Disassembler/ARM/neont-VLD-reencoding.txt @@ -0,0 +1,77 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s + +0xa0 0xf9 0x00 0x00 +0xa0 0xf9 0x20 0x00 +0xa0 0xf9 0x40 0x00 +0xa0 0xf9 0x60 0x00 +0xa0 0xf9 0x80 0x00 +0xa0 0xf9 0xa0 0x00 +0xa0 0xf9 0xc0 0x00 +0xa0 0xf9 0xe0 0x00 + +# CHECK: vld1.8 {d0[0]}, [r0], r0 @ encoding: [0xa0,0xf9,0x00,0x00] +# CHECK: vld1.8 {d0[1]}, [r0], r0 @ encoding: [0xa0,0xf9,0x20,0x00] +# CHECK: vld1.8 {d0[2]}, [r0], r0 @ encoding: [0xa0,0xf9,0x40,0x00] +# CHECK: vld1.8 {d0[3]}, [r0], r0 @ encoding: [0xa0,0xf9,0x60,0x00] +# CHECK: vld1.8 {d0[4]}, [r0], r0 @ encoding: [0xa0,0xf9,0x80,0x00] +# CHECK: vld1.8 {d0[5]}, [r0], r0 @ encoding: [0xa0,0xf9,0xa0,0x00] +# CHECK: vld1.8 {d0[6]}, [r0], r0 @ encoding: [0xa0,0xf9,0xc0,0x00] +# CHECK: vld1.8 {d0[7]}, [r0], r0 @ encoding: [0xa0,0xf9,0xe0,0x00] + +0xa0 0xf9 0x00 0x04 +0xa0 0xf9 0x10 0x04 +0xa0 0xf9 0x40 0x04 +0xa0 0xf9 0x50 0x04 +0xa0 0xf9 0x80 0x04 +0xa0 0xf9 0x90 0x04 +0xa0 0xf9 0xc0 0x04 +0xa0 0xf9 0xd0 0x04 + +# CHECK: vld1.16 {d0[0]}, [r0], r0 @ encoding: [0xa0,0xf9,0x00,0x04] +# CHECK: vld1.16 {d0[0]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x10,0x04] +# CHECK: vld1.16 {d0[1]}, [r0], r0 @ encoding: [0xa0,0xf9,0x40,0x04] +# CHECK: vld1.16 {d0[1]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x50,0x04] +# CHECK: vld1.16 {d0[2]}, [r0], r0 @ encoding: [0xa0,0xf9,0x80,0x04] +# CHECK: vld1.16 {d0[2]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0x90,0x04] +# CHECK: vld1.16 {d0[3]}, [r0], r0 @ encoding: [0xa0,0xf9,0xc0,0x04] +# CHECK: vld1.16 {d0[3]}, [r0, :16], r0 @ encoding: [0xa0,0xf9,0xd0,0x04] + +0xa0 0xf9 0x00 0x08 +0xa0 0xf9 0x30 0x08 +0xa0 0xf9 0x80 0x08 +0xa0 0xf9 0xb0 0x08 + +# CHECK: vld1.32 {d0[0]}, [r0], r0 @ encoding: [0xa0,0xf9,0x00,0x08] +# CHECK: vld1.32 {d0[0]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0x30,0x08] +# CHECK: vld1.32 {d0[1]}, [r0], r0 @ encoding: [0xa0,0xf9,0x80,0x08] +# CHECK: vld1.32 {d0[1]}, [r0, :32], r0 @ encoding: [0xa0,0xf9,0xb0,0x08] + +0xa0 0xf9 0x1f 0x04 +0xa0 0xf9 0x8f 0x00 + +# CHECK: vld1.16 {d0[0]}, [r0, :16] @ encoding: [0xa0,0xf9,0x1f,0x04] +# CHECK: vld1.8 {d0[4]}, [r0] @ encoding: [0xa0,0xf9,0x8f,0x00] + +0xa0 0xf9 0x1d 0x04 +0xa0 0xf9 0x8d 0x00 + +# CHECK: vld1.16 {d0[0]}, [r0, :16]! @ encoding: [0xa0,0xf9,0x1d,0x04] +# CHECK: vld1.8 {d0[4]}, [r0]! @ encoding: [0xa0,0xf9,0x8d,0x00] + +0xa5 0xf9 0x10 0x04 +0xa5 0xf9 0x1a 0x04 +0xae 0xf9 0x1a 0x04 +0xa5 0xf9 0x1a 0x94 + +# CHECK: vld1.16 {d0[0]}, [r5, :16], r0 @ encoding: [0xa5,0xf9,0x10,0x04] +# CHECK: vld1.16 {d0[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x04] +# CHECK: vld1.16 {d0[0]}, [lr, :16], r10 @ encoding: [0xae,0xf9,0x1a,0x04] +# CHECK: vld1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0xa5,0xf9,0x1a,0x94] + +0xa0 0xf9 0x20 0x0b +0xa0 0xf9 0x20 0x07 +0xa0 0xf9 0x20 0x03 + +# CHECK: vld4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0, :128], r0 @ encoding: [0xa0,0xf9,0x20,0x0b] +# CHECK: vld4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0], r0 @ encoding: [0xa0,0xf9,0x20,0x07] +# CHECK: vld4.8 {d0[1], d1[1], d2[1], d3[1]}, [r0], r0 @ encoding: [0xa0,0xf9,0x20,0x03] diff --git a/test/MC/Disassembler/ARM/neont-VST-reencoding.txt b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt new file mode 100644 index 0000000000..eb3722c085 --- /dev/null +++ b/test/MC/Disassembler/ARM/neont-VST-reencoding.txt @@ -0,0 +1,77 @@ +# RUN: llvm-mc -triple thumbv7 -show-encoding -disassemble < %s | FileCheck %s + +0x80 0xf9 0x00 0x00 +0x81 0xf9 0x21 0x10 +0x81 0xf9 0x42 0x10 +0x81 0xf9 0x61 0x20 +0x82 0xf9 0x82 0x20 +0x82 0xf9 0xa1 0x10 +0x82 0xf9 0xc2 0x20 +0x83 0xf9 0xe3 0x30 + +# CHECK: vst1.8 {d0[0]}, [r0], r0 @ encoding: [0x80,0xf9,0x00,0x00] +# CHECK: vst1.8 {d1[1]}, [r1], r1 @ encoding: [0x81,0xf9,0x21,0x10] +# CHECK: vst1.8 {d1[2]}, [r1], r2 @ encoding: [0x81,0xf9,0x42,0x10] +# CHECK: vst1.8 {d2[3]}, [r1], r1 @ encoding: [0x81,0xf9,0x61,0x20] +# CHECK: vst1.8 {d2[4]}, [r2], r2 @ encoding: [0x82,0xf9,0x82,0x20] +# CHECK: vst1.8 {d1[5]}, [r2], r1 @ encoding: [0x82,0xf9,0xa1,0x10] +# CHECK: vst1.8 {d2[6]}, [r2], r2 @ encoding: [0x82,0xf9,0xc2,0x20] +# CHECK: vst1.8 {d3[7]}, [r3], r3 @ encoding: [0x83,0xf9,0xe3,0x30] + +0x80 0xf9 0x00 0x04 +0xc3 0xf9 0x13 0x04 +0xc4 0xf9 0x43 0x04 +0xc5 0xf9 0x55 0x04 +0xc6 0xf9 0x85 0x04 +0xc7 0xf9 0x95 0x74 +0xc8 0xf9 0xc7 0x84 +0xc9 0xf9 0xd9 0x94 + +# CHECK: vst1.16 {d0[0]}, [r0], r0 @ encoding: [0x80,0xf9,0x00,0x04] +# CHECK: vst1.16 {d16[0]}, [r3, :16], r3 @ encoding: [0xc3,0xf9,0x13,0x04] +# CHECK: vst1.16 {d16[1]}, [r4], r3 @ encoding: [0xc4,0xf9,0x43,0x04] +# CHECK: vst1.16 {d16[1]}, [r5, :16], r5 @ encoding: [0xc5,0xf9,0x55,0x04] +# CHECK: vst1.16 {d16[2]}, [r6], r5 @ encoding: [0xc6,0xf9,0x85,0x04] +# CHECK: vst1.16 {d23[2]}, [r7, :16], r5 @ encoding: [0xc7,0xf9,0x95,0x74] +# CHECK: vst1.16 {d24[3]}, [r8], r7 @ encoding: [0xc8,0xf9,0xc7,0x84] +# CHECK: vst1.16 {d25[3]}, [r9, :16], r9 @ encoding: [0xc9,0xf9,0xd9,0x94] + +0x8a 0xf9 0x01 0xa8 +0xcb 0xf9 0x32 0x18 +0x8c 0xf9 0x83 0xb8 +0xcd 0xf9 0xb4 0x28 + +# CHECK: vst1.32 {d10[0]}, [r10], r1 @ encoding: [0x8a,0xf9,0x01,0xa8] +# CHECK: vst1.32 {d17[0]}, [r11, :32], r2 @ encoding: [0xcb,0xf9,0x32,0x18] +# CHECK: vst1.32 {d11[1]}, [r12], r3 @ encoding: [0x8c,0xf9,0x83,0xb8] +# CHECK: vst1.32 {d18[1]}, [sp, :32], r4 @ encoding: [0xcd,0xf9,0xb4,0x28] + +0x81 0xf9 0x1f 0x44 +0x82 0xf9 0x8f 0x30 + +# CHECK: vst1.16 {d4[0]}, [r1, :16] @ encoding: [0x81,0xf9,0x1f,0x44] +# CHECK: vst1.8 {d3[4]}, [r2] @ encoding: [0x82,0xf9,0x8f,0x30] + +0x83 0xf9 0x1d 0x24 +0x84 0xf9 0x8d 0x10 + +# CHECK: vst1.16 {d2[0]}, [r3, :16]! @ encoding: [0x83,0xf9,0x1d,0x24] +# CHECK: vst1.8 {d1[4]}, [r4]! @ encoding: [0x84,0xf9,0x8d,0x10] + +0x85 0xf9 0x10 0x04 +0x85 0xf9 0x1a 0x74 +0x8e 0xf9 0x1a 0x84 +0x85 0xf9 0x1a 0x94 + +# CHECK: vst1.16 {d0[0]}, [r5, :16], r0 @ encoding: [0x85,0xf9,0x10,0x04] +# CHECK: vst1.16 {d7[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x74] +# CHECK: vst1.16 {d8[0]}, [lr, :16], r10 @ encoding: [0x8e,0xf9,0x1a,0x84] +# CHECK: vst1.16 {d9[0]}, [r5, :16], r10 @ encoding: [0x85,0xf9,0x1a,0x94] + +0x81 0xf9 0x24 0x0b +0x82 0xf9 0x25 0x07 +0x83 0xf9 0x26 0x03 + +# CHECK: vst4.32 {d0[0], d1[0], d2[0], d3[0]}, [r1, :128], r4 @ encoding: [0x81,0xf9,0x24,0x0b] +# CHECK: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r2], r5 @ encoding: [0x82,0xf9,0x25,0x07] +# CHECK: vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r3], r6 @ encoding: [0x83,0xf9,0x26,0x03] diff --git a/test/MC/ELF/cfi-reg.s b/test/MC/ELF/cfi-reg.s new file mode 100644 index 0000000000..fd68d6d5ad --- /dev/null +++ b/test/MC/ELF/cfi-reg.s @@ -0,0 +1,18 @@ +// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s +// PR13754 + +f: + .cfi_startproc + nop + .cfi_offset 6, -16 + nop + .cfi_offset %rsi, -16 + nop + .cfi_offset rbx, -16 + nop + .cfi_endproc + +// CHECK: f: +// CHECK: .cfi_offset %rbp, -16 +// CHECK: .cfi_offset %rsi, -16 +// CHECK: .cfi_offset %rbx, -16 diff --git a/test/MC/Mips/do_switch.ll b/test/MC/Mips/do_switch.ll new file mode 100644 index 0000000000..7eda1b41d1 --- /dev/null +++ b/test/MC/Mips/do_switch.ll @@ -0,0 +1,39 @@ +; This test case will cause an internal EK_GPRel64BlockAddress to be +; produced. This was not handled for direct object and an assertion +; to occur. This is a variation on test case test/CodeGen/Mips/do_switch.ll + +; RUN: llc < %s -filetype=obj -march=mips -relocation-model=static + +; RUN: llc < %s -filetype=obj -march=mips -relocation-model=pic + +; RUN: llc < %s -filetype=obj -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 + +define i32 @main() nounwind readnone { +entry: + %x = alloca i32, align 4 ; <i32*> [#uses=2] + store volatile i32 2, i32* %x, align 4 + %0 = load volatile i32* %x, align 4 ; <i32> [#uses=1] + + switch i32 %0, label %bb4 [ + i32 0, label %bb5 + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + ] + +bb1: ; preds = %entry + ret i32 2 + +bb2: ; preds = %entry + ret i32 0 + +bb3: ; preds = %entry + ret i32 3 + +bb4: ; preds = %entry + ret i32 4 + +bb5: ; preds = %entry + ret i32 1 +} + diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll index 23ec53a2e2..ae6de78d65 100644 --- a/test/MC/Mips/elf-N64.ll +++ b/test/MC/Mips/elf-N64.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - | elf-dump --dump-section-data | FileCheck %s +; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | elf-dump --dump-section-data | FileCheck %s ; Check for N64 relocation production. ; diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll index 81a89e3040..0c66522033 100644 --- a/test/MC/Mips/higher_highest.ll +++ b/test/MC/Mips/higher_highest.ll @@ -1,5 +1,8 @@ -; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s - +; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s +; RUN: false +; XFAIL: * +; Disabled because currently we don't have a way to generate these relocations. +; ; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created. ; CHECK: ('r_type', 0x1d) diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s new file mode 100644 index 0000000000..2997782cd0 --- /dev/null +++ b/test/MC/Mips/mips-alu-instructions.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s +# Check that the assembler can handle the documented syntax +# for arithmetic and logical instructions. +# CHECK: .section __TEXT,__text,regular,pure_instructions +#------------------------------------------------------------------------------ +# Logical instructions +#------------------------------------------------------------------------------ +# CHECK: and $9, $6, $7 # encoding: [0x24,0x48,0xc7,0x00] +# CHECK: andi $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x30] +# CHECK: andi $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x30] +# CHECK: clo $6, $7 # encoding: [0x21,0x30,0xe6,0x70] +# CHECK: clz $6, $7 # encoding: [0x20,0x30,0xe6,0x70] +# CHECK: ins $19, $9, 6, 7 # encoding: [0x84,0x61,0x33,0x7d] +# CHECK: nor $9, $6, $7 # encoding: [0x27,0x48,0xc7,0x00] +# CHECK: or $3, $3, $5 # encoding: [0x25,0x18,0x65,0x00] +# CHECK: ori $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x34] +# CHECK: rotr $9, $6, 7 # encoding: [0xc2,0x49,0x26,0x00] +# CHECK: rotrv $9, $6, $7 # encoding: [0x46,0x48,0xe6,0x00] +# CHECK: sll $4, $3, 7 # encoding: [0xc0,0x21,0x03,0x00] +# CHECK: sllv $2, $3, $5 # encoding: [0x04,0x10,0xa3,0x00] +# CHECK: slt $3, $3, $5 # encoding: [0x2a,0x18,0x65,0x00] +# CHECK: slti $3, $3, 103 # encoding: [0x67,0x00,0x63,0x28] +# CHECK: slti $3, $3, 103 # encoding: [0x67,0x00,0x63,0x28] +# CHECK: sltiu $3, $3, 103 # encoding: [0x67,0x00,0x63,0x2c] +# CHECK: sltu $3, $3, $5 # encoding: [0x2b,0x18,0x65,0x00] +# CHECK: sra $4, $3, 7 # encoding: [0xc3,0x21,0x03,0x00] +# CHECK: srav $2, $3, $5 # encoding: [0x07,0x10,0xa3,0x00] +# CHECK: srl $4, $3, 7 # encoding: [0xc2,0x21,0x03,0x00] +# CHECK: srlv $2, $3, $5 # encoding: [0x06,0x10,0xa3,0x00] +# CHECK: xor $3, $3, $5 # encoding: [0x26,0x18,0x65,0x00] +# CHECK: xori $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x38] +# CHECK: xori $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x38] +# CHECK: wsbh $6, $7 # encoding: [0xa0,0x30,0x07,0x7c] +# CHECK: nor $7, $8, $zero # encoding: [0x27,0x38,0x00,0x01] + and $9, $6, $7 + and $9, $6, 17767 + andi $9, $6, 17767 + clo $6, $7 + clz $6, $7 + ins $19, $9, 6,7 + nor $9, $6, $7 + or $3, $3, $5 + ori $9, $6, 17767 + rotr $9, $6, 7 + rotrv $9, $6, $7 + sll $4, $3, 7 + sllv $2, $3, $5 + slt $3, $3, $5 + slt $3, $3, 103 + slti $3, $3, 103 + sltiu $3, $3, 103 + sltu $3, $3, $5 + sra $4, $3, 7 + srav $2, $3, $5 + srl $4, $3, 7 + srlv $2, $3, $5 + xor $3, $3, $5 + xor $9, $6, 17767 + xori $9, $6, 17767 + wsbh $6, $7 + not $7 ,$8 + +#------------------------------------------------------------------------------ +# Arithmetic instructions +#------------------------------------------------------------------------------ + +# CHECK: add $9, $6, $7 # encoding: [0x20,0x48,0xc7,0x00] +# CHECK: addi $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x20] +# CHECK: addiu $9, $6, -15001 # encoding: [0x67,0xc5,0xc9,0x24] +# CHECK: addi $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x20] +# CHECK: addiu $9, $6, -15001 # encoding: [0x67,0xc5,0xc9,0x24] +# CHECK: addu $9, $6, $7 # encoding: [0x21,0x48,0xc7,0x00] +# CHECK: madd $6, $7 # encoding: [0x00,0x00,0xc7,0x70] +# CHECK: maddu $6, $7 # encoding: [0x01,0x00,0xc7,0x70] +# CHECK: msub $6, $7 # encoding: [0x04,0x00,0xc7,0x70] +# CHECK: msubu $6, $7 # encoding: [0x05,0x00,0xc7,0x70] +# CHECK: mult $3, $5 # encoding: [0x18,0x00,0x65,0x00] +# CHECK: multu $3, $5 # encoding: [0x19,0x00,0x65,0x00] +# CHECK: sub $9, $6, $7 # encoding: [0x22,0x48,0xc7,0x00] +# CHECK: subu $4, $3, $5 # encoding: [0x23,0x20,0x65,0x00] +# CHECK: sub $6, $zero, $7 # encoding: [0x22,0x30,0x07,0x00] +# CHECK: subu $6, $zero, $7 # encoding: [0x23,0x30,0x07,0x00] +# CHECK: add $7, $8, $zero # encoding: [0x20,0x38,0x00,0x01] + add $9,$6,$7 + add $9,$6,17767 + addu $9,$6,-15001 + addi $9,$6,17767 + addiu $9,$6,-15001 + addu $9,$6,$7 + madd $6,$7 + maddu $6,$7 + msub $6,$7 + msubu $6,$7 + mult $3,$5 + multu $3,$5 + sub $9,$6,$7 + subu $4,$3,$5 + neg $6,$7 + negu $6,$7 + move $7,$8 diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s new file mode 100644 index 0000000000..ce8024d45b --- /dev/null +++ b/test/MC/Mips/mips-fpu-instructions.s @@ -0,0 +1,162 @@ +# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s +# Check that the assembler can handle the documented syntax +# for FPU instructions. +# CHECK: .section __TEXT,__text,regular,pure_instructions +#------------------------------------------------------------------------------ +# FP aritmetic instructions +#------------------------------------------------------------------------------ + +# CHECK: abs.d $f12, $f14 # encoding: [0x05,0x73,0x20,0x46] +# CHECK: abs.s $f6, $f7 # encoding: [0x85,0x39,0x00,0x46] +# CHECK: add.d $f8, $f12, $f14 # encoding: [0x00,0x62,0x2e,0x46] +# CHECK: add.s $f9, $f6, $f7 # encoding: [0x40,0x32,0x07,0x46] +# CHECK: floor.w.d $f12, $f14 # encoding: [0x0f,0x73,0x20,0x46] +# CHECK: floor.w.s $f6, $f7 # encoding: [0x8f,0x39,0x00,0x46] +# CHECK: ceil.w.d $f12, $f14 # encoding: [0x0e,0x73,0x20,0x46] +# CHECK: ceil.w.s $f6, $f7 # encoding: [0x8e,0x39,0x00,0x46] +# CHECK: mul.d $f8, $f12, $f14 # encoding: [0x02,0x62,0x2e,0x46] +# CHECK: mul.s $f9, $f6, $f7 # encoding: [0x42,0x32,0x07,0x46] +# CHECK: neg.d $f12, $f14 # encoding: [0x07,0x73,0x20,0x46] +# CHECK: neg.s $f6, $f7 # encoding: [0x87,0x39,0x00,0x46] +# CHECK: round.w.d $f12, $f14 # encoding: [0x0c,0x73,0x20,0x46] +# CHECK: round.w.s $f6, $f7 # encoding: [0x8c,0x39,0x00,0x46] +# CHECK: sqrt.d $f12, $f14 # encoding: [0x04,0x73,0x20,0x46] +# CHECK: sqrt.s $f6, $f7 # encoding: [0x84,0x39,0x00,0x46] +# CHECK: sub.d $f8, $f12, $f14 # encoding: [0x01,0x62,0x2e,0x46] +# CHECK: sub.s $f9, $f6, $f7 # encoding: [0x41,0x32,0x07,0x46] +# CHECK: trunc.w.d $f12, $f14 # encoding: [0x0d,0x73,0x20,0x46] +# CHECK: trunc.w.s $f6, $f7 # encoding: [0x8d,0x39,0x00,0x46] + + abs.d $f12,$f14 + abs.s $f6,$f7 + add.d $f8,$f12,$f14 + add.s $f9,$f6,$f7 + floor.w.d $f12,$f14 + floor.w.s $f6,$f7 + ceil.w.d $f12,$f14 + ceil.w.s $f6,$f7 + mul.d $f8,$f12,$f14 + mul.s $f9,$f6, $f7 + neg.d $f12,$f14 + neg.s $f6,$f7 + round.w.d $f12,$f14 + round.w.s $f6,$f7 + sqrt.d $f12,$f14 + sqrt.s $f6,$f7 + sub.d $f8,$f12,$f14 + sub.s $f9,$f6,$f7 + trunc.w.d $f12,$f14 + trunc.w.s $f6,$f7 + +#------------------------------------------------------------------------------ +# FP compare instructions +#------------------------------------------------------------------------------ + +# CHECK: c.eq.d $f12, $f14 # encoding: [0x32,0x60,0x2e,0x46] +# CHECK: c.eq.s $f6, $f7 # encoding: [0x32,0x30,0x07,0x46] +# CHECK: c.f.d $f12, $f14 # encoding: [0x30,0x60,0x2e,0x46] +# CHECK: c.f.s $f6, $f7 # encoding: [0x30,0x30,0x07,0x46] +# CHECK: c.le.d $f12, $f14 # encoding: [0x3e,0x60,0x2e,0x46] +# CHECK: c.le.s $f6, $f7 # encoding: [0x3e,0x30,0x07,0x46] +# CHECK: c.lt.d $f12, $f14 # encoding: [0x3c,0x60,0x2e,0x46] +# CHECK: c.lt.s $f6, $f7 # encoding: [0x3c,0x30,0x07,0x46] +# CHECK: c.nge.d $f12, $f14 # encoding: [0x3d,0x60,0x2e,0x46] +# CHECK: c.nge.s $f6, $f7 # encoding: [0x3d,0x30,0x07,0x46] +# CHECK: c.ngl.d $f12, $f14 # encoding: [0x3b,0x60,0x2e,0x46] +# CHECK: c.ngl.s $f6, $f7 # encoding: [0x3b,0x30,0x07,0x46] +# CHECK: c.ngle.d $f12, $f14 # encoding: [0x39,0x60,0x2e,0x46] +# CHECK: c.ngle.s $f6, $f7 # encoding: [0x39,0x30,0x07,0x46] +# CHECK: c.ngt.d $f12, $f14 # encoding: [0x3f,0x60,0x2e,0x46] +# CHECK: c.ngt.s $f6, $f7 # encoding: [0x3f,0x30,0x07,0x46] +# CHECK: c.ole.d $f12, $f14 # encoding: [0x36,0x60,0x2e,0x46] +# CHECK: c.ole.s $f6, $f7 # encoding: [0x36,0x30,0x07,0x46] +# CHECK: c.olt.d $f12, $f14 # encoding: [0x34,0x60,0x2e,0x46] +# CHECK: c.olt.s $f6, $f7 # encoding: [0x34,0x30,0x07,0x46] +# CHECK: c.seq.d $f12, $f14 # encoding: [0x3a,0x60,0x2e,0x46] +# CHECK: c.seq.s $f6, $f7 # encoding: [0x3a,0x30,0x07,0x46] +# CHECK: c.sf.d $f12, $f14 # encoding: [0x38,0x60,0x2e,0x46] +# CHECK: c.sf.s $f6, $f7 # encoding: [0x38,0x30,0x07,0x46] +# CHECK: c.ueq.d $f12, $f14 # encoding: [0x33,0x60,0x2e,0x46] +# CHECK: c.ueq.s $f28, $f18 # encoding: [0x33,0xe0,0x12,0x46] +# CHECK: c.ule.d $f12, $f14 # encoding: [0x37,0x60,0x2e,0x46] +# CHECK: c.ule.s $f6, $f7 # encoding: [0x37,0x30,0x07,0x46] +# CHECK: c.ult.d $f12, $f14 # encoding: [0x35,0x60,0x2e,0x46] +# CHECK: c.ult.s $f6, $f7 # encoding: [0x35,0x30,0x07,0x46] +# CHECK: c.un.d $f12, $f14 # encoding: [0x31,0x60,0x2e,0x46] +# CHECK: c.un.s $f6, $f7 # encoding: [0x31,0x30,0x07,0x46] + + c.eq.d $f12,$f14 + c.eq.s $f6,$f7 + c.f.d $f12,$f14 + c.f.s $f6,$f7 + c.le.d $f12,$f14 + c.le.s $f6,$f7 + c.lt.d $f12,$f14 + c.lt.s $f6,$f7 + c.nge.d $f12,$f14 + c.nge.s $f6,$f7 + c.ngl.d $f12,$f14 + c.ngl.s $f6,$f7 + c.ngle.d $f12,$f14 + c.ngle.s $f6,$f7 + c.ngt.d $f12,$f14 + c.ngt.s $f6,$f7 + c.ole.d $f12,$f14 + c.ole.s $f6,$f7 + c.olt.d $f12,$f14 + c.olt.s $f6,$f7 + c.seq.d $f12,$f14 + c.seq.s $f6,$f7 + c.sf.d $f12,$f14 + c.sf.s $f6,$f7 + c.ueq.d $f12,$f14 + c.ueq.s $f28,$f18 + c.ule.d $f12,$f14 + c.ule.s $f6,$f7 + c.ult.d $f12,$f14 + c.ult.s $f6,$f7 + c.un.d $f12,$f14 + c.un.s $f6,$f7 + +#------------------------------------------------------------------------------ +# FP convert instructions +#------------------------------------------------------------------------------ +# CHECK: cvt.d.s $f6, $f7 # encoding: [0xa1,0x39,0x00,0x46] +# CHECK: cvt.d.w $f12, $f14 # encoding: [0x21,0x73,0x80,0x46] +# CHECK: cvt.s.d $f12, $f14 # encoding: [0x20,0x73,0x20,0x46] +# CHECK: cvt.s.w $f6, $f7 # encoding: [0xa0,0x39,0x80,0x46] +# CHECK: cvt.w.d $f12, $f14 # encoding: [0x24,0x73,0x20,0x46] +# CHECK: cvt.w.s $f6, $f7 # encoding: [0xa4,0x39,0x00,0x46] + + cvt.d.s $f6,$f7 + cvt.d.w $f12,$f14 + cvt.s.d $f12,$f14 + cvt.s.w $f6,$f7 + cvt.w.d $f12,$f14 + cvt.w.s $f6,$f7 + +#------------------------------------------------------------------------------ +# FP move instructions +#------------------------------------------------------------------------------ + +# CHECK: cfc1 $6, $fcc0 # encoding: [0x00,0x00,0x46,0x44] +# CHECK: mfc1 $6, $f7 # encoding: [0x00,0x38,0x06,0x44] +# CHECK: mfhi $5 # encoding: [0x10,0x28,0x00,0x00] +# CHECK: mflo $5 # encoding: [0x12,0x28,0x00,0x00] +# CHECK: mov.d $f6, $f8 # encoding: [0x86,0x41,0x20,0x46] +# CHECK: mov.s $f6, $f7 # encoding: [0x86,0x39,0x00,0x46] +# CHECK: mtc1 $6, $f7 # encoding: [0x00,0x38,0x86,0x44] +# CHECK: mthi $7 # encoding: [0x11,0x00,0xe0,0x00] +# CHECK: mtlo $7 # encoding: [0x13,0x00,0xe0,0x00] +# CHECK: swc1 $f9, 9158($7) # encoding: [0xc6,0x23,0xe9,0xe4] + + cfc1 $a2,$0 + mfc1 $a2,$f7 + mfhi $a1 + mflo $a1 + mov.d $f6,$f8 + mov.s $f6,$f7 + mtc1 $a2,$f7 + mthi $a3 + mtlo $a3 + swc1 $f9,9158($a3) diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s new file mode 100644 index 0000000000..998be418d2 --- /dev/null +++ b/test/MC/Mips/mips-jump-instructions.s @@ -0,0 +1,72 @@ +# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s +# Check that the assembler can handle the documented syntax +# for jumps and branches. +# CHECK: .section __TEXT,__text,regular,pure_instructions +#------------------------------------------------------------------------------ +# Branch instructions +#------------------------------------------------------------------------------ +# CHECK: b 1332 # encoding: [0x34,0x05,0x00,0x10] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bc1f 1332 # encoding: [0x34,0x05,0x00,0x45] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bc1t 1332 # encoding: [0x34,0x05,0x01,0x45] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: beq $9, $6, 1332 # encoding: [0x34,0x05,0x26,0x11] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bgez $6, 1332 # encoding: [0x34,0x05,0xc1,0x04] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bgezal $6, 1332 # encoding: [0x34,0x05,0xd1,0x04] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bgtz $6, 1332 # encoding: [0x34,0x05,0xc0,0x1c] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: blez $6, 1332 # encoding: [0x34,0x05,0xc0,0x18] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bne $9, $6, 1332 # encoding: [0x34,0x05,0x26,0x15] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: bal 1332 # encoding: [0x34,0x05,0x00,0x04] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] + b 1332 + nop + bc1f 1332 + nop + bc1t 1332 + nop + beq $9,$6,1332 + nop + bgez $6,1332 + nop + bgezal $6,1332 + nop + bgtz $6,1332 + nop + blez $6,1332 + nop + bne $9,$6,1332 + nop + bal 1332 + nop + +end_of_code: +#------------------------------------------------------------------------------ +# Jump instructions +#------------------------------------------------------------------------------ +# CHECK: j 1328 # encoding: [0x30,0x05,0x00,0x08] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jal 1328 # encoding: [0x30,0x05,0x00,0x0c] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jalr $6 # encoding: [0x09,0xf8,0xc0,0x00] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jr $7 # encoding: [0x08,0x00,0xe0,0x00] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jr $7 # encoding: [0x08,0x00,0xe0,0x00] + + + j 1328 + nop + jal 1328 + nop + jalr $6 + nop + jr $7 + nop + j $7 diff --git a/test/MC/Mips/mips-memory-instructions.s b/test/MC/Mips/mips-memory-instructions.s new file mode 100644 index 0000000000..b5f1267ef3 --- /dev/null +++ b/test/MC/Mips/mips-memory-instructions.s @@ -0,0 +1,45 @@ +# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s +# Check that the assembler can handle the documented syntax +# for loads and stores. +# CHECK: .section __TEXT,__text,regular,pure_instructions +#------------------------------------------------------------------------------ +# Memory store instructions +#------------------------------------------------------------------------------ +# CHECK: sb $4, 16($5) # encoding: [0x10,0x00,0xa4,0xa0] +# CHECK: sc $4, 16($5) # encoding: [0x10,0x00,0xa4,0xe0] +# CHECK: sh $4, 16($5) # encoding: [0x10,0x00,0xa4,0xa4] +# CHECK: sw $4, 16($5) # encoding: [0x10,0x00,0xa4,0xac] +# CHECK: sw $7, 0($5) # encoding: [0x00,0x00,0xa7,0xac] +# CHECK: swc1 $f2, 16($5) # encoding: [0x10,0x00,0xa2,0xe4] +# CHECK: swl $4, 16($5) # encoding: [0x10,0x00,0xa4,0xa8] + sb $4, 16($5) + sc $4, 16($5) + sh $4, 16($5) + sw $4, 16($5) + sw $7, ($5) + swc1 $f2, 16($5) + swl $4, 16($5) + +#------------------------------------------------------------------------------ +# Memory load instructions +#------------------------------------------------------------------------------ + +# CHECK: lb $4, 4($5) # encoding: [0x04,0x00,0xa4,0x80] +# CHECK: lw $4, 4($5) # encoding: [0x04,0x00,0xa4,0x8c] +# CHECK: lbu $4, 4($5) # encoding: [0x04,0x00,0xa4,0x90] +# CHECK: lh $4, 4($5) # encoding: [0x04,0x00,0xa4,0x84] +# CHECK: lhu $4, 4($5) # encoding: [0x04,0x00,0xa4,0x94] +# CHECK: ll $4, 4($5) # encoding: [0x04,0x00,0xa4,0xc0] +# CHECK: lw $4, 4($5) # encoding: [0x04,0x00,0xa4,0x8c] +# CHECK: lw $7, 0($7) # encoding: [0x00,0x00,0xe7,0x8c] +# CHECK: lw $2, 16($sp) # encoding: [0x10,0x00,0xa2,0x8f] + + lb $4, 4($5) + lw $4, 4($5) + lbu $4, 4($5) + lh $4, 4($5) + lhu $4, 4($5) + ll $4, 4($5) + lw $4, 4($5) + lw $7, ($7) + lw $2, 16($sp) diff --git a/test/MC/Mips/mips64extins.ll b/test/MC/Mips/mips64extins.ll new file mode 100644 index 0000000000..ebe8f86513 --- /dev/null +++ b/test/MC/Mips/mips64extins.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -mattr=n64 %s -o - \ +; RUN: | llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 - \ +; RUN: | FileCheck %s + +define i64 @dext(i64 %i) nounwind readnone { +entry: +; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10 + %shr = lshr i64 %i, 5 + %and = and i64 %shr, 1023 + ret i64 %and +} + +define i64 @dextu(i64 %i) nounwind readnone { +entry: +; CHECK: dextu ${{[0-9]+}}, ${{[0-9]+}}, 2, 6 + %shr = lshr i64 %i, 34 + %and = and i64 %shr, 63 + ret i64 %and +} + +define i64 @dextm(i64 %i) nounwind readnone { +entry: +; CHECK: dextm ${{[0-9]+}}, ${{[0-9]+}}, 5, 2 + %shr = lshr i64 %i, 5 + %and = and i64 %shr, 17179869183 + ret i64 %and +} + +define i64 @dins(i64 %i, i64 %j) nounwind readnone { +entry: +; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10 + %shl2 = shl i64 %j, 8 + %and = and i64 %shl2, 261888 + %and3 = and i64 %i, -261889 + %or = or i64 %and3, %and + ret i64 %or +} + +define i64 @dinsm(i64 %i, i64 %j) nounwind readnone { +entry: +; CHECK: dinsm ${{[0-9]+}}, ${{[0-9]+}}, 10, 1 + %shl4 = shl i64 %j, 10 + %and = and i64 %shl4, 8796093021184 + %and5 = and i64 %i, -8796093021185 + %or = or i64 %and5, %and + ret i64 %or +} + +define i64 @dinsu(i64 %i, i64 %j) nounwind readnone { +entry: +; CHECK: dinsu ${{[0-9]+}}, ${{[0-9]+}}, 8, 13 + %shl4 = shl i64 %j, 40 + %and = and i64 %shl4, 9006099743113216 + %and5 = and i64 %i, -9006099743113217 + %or = or i64 %and5, %and + ret i64 %or +} diff --git a/test/MC/Mips/mips64shift.ll b/test/MC/Mips/mips64shift.ll index 7817b96fa5..99cac7b591 100644 --- a/test/MC/Mips/mips64shift.ll +++ b/test/MC/Mips/mips64shift.ll @@ -1,5 +1,8 @@ -; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - | llvm-objdump -disassemble -triple mips64el - | FileCheck %s +; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -disable-mips-delay-filler %s -o - \ +; RUN: | llvm-objdump -disassemble -triple mips64el - | FileCheck %s +; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - \ +; RUN: | llvm-objdump -disassemble -triple mips64el - | FileCheck %s define i64 @f3(i64 %a0) nounwind readnone { entry: diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s new file mode 100644 index 0000000000..f9d846078b --- /dev/null +++ b/test/MC/Mips/mips_directives.s @@ -0,0 +1,10 @@ +# RUN: llvm-mc -triple mips-unknown-unknown %s + +$BB0_2: + .frame $sp,0,$ra + .mask 0x00000000,0 + .fmask 0x00000000,0 + .set noreorder + .set nomacro +$JTI0_0: + .gpword ($BB0_2) diff --git a/test/MC/Mips/multi-64bit-func.ll b/test/MC/Mips/multi-64bit-func.ll index 6e0d784e07..83577aa162 100644 --- a/test/MC/Mips/multi-64bit-func.ll +++ b/test/MC/Mips/multi-64bit-func.ll @@ -1,8 +1,8 @@ ; There is no real check here. If the test doesn't ; assert it passes. -; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s +; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -disable-mips-delay-filler < %s ; Run it again without extra nop in delay slot -; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 -enable-mips-delay-filler < %s +; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s define i32 @bosco1(i32 %x) nounwind readnone { entry: diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index 6a2d5bba6b..03cb62e7cb 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -1164,6 +1164,10 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1] // CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7] movd %rdi,%xmm0 +// CHECK: movd %xmm0, %rax +// CHECK: encoding: [0x66,0x48,0x0f,0x7e,0xc0] + movd %xmm0, %rax + // CHECK: movntil %eax, (%rdi) // CHECK: encoding: [0x0f,0xc3,0x07] // CHECK: movntil diff --git a/test/Makefile b/test/Makefile index 9ddfabfb9a..ae7a67401f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -131,7 +131,7 @@ lit.site.cfg: FORCE @$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp @$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp @$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp - @$(ECHOPATH) s,@OCAMLOPT@,$(OCAMLOPT) -cc \\\\\"$(CXX_FOR_OCAMLOPT)\\\\\" -I $(LibDir)/ocaml,g >> lit.tmp + @$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp @$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp @$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp diff --git a/test/Object/Inputs/dext-test.elf-mips64r2 b/test/Object/Inputs/dext-test.elf-mips64r2 Binary files differnew file mode 100644 index 0000000000..59dbaef69a --- /dev/null +++ b/test/Object/Inputs/dext-test.elf-mips64r2 diff --git a/test/Object/Inputs/relocations.elf-x86-64 b/test/Object/Inputs/relocations.elf-x86-64 Binary files differnew file mode 100644 index 0000000000..6e340c7525 --- /dev/null +++ b/test/Object/Inputs/relocations.elf-x86-64 diff --git a/test/Object/Mips/feature.test b/test/Object/Mips/feature.test new file mode 100644 index 0000000000..e8da609746 --- /dev/null +++ b/test/Object/Mips/feature.test @@ -0,0 +1,11 @@ +RUN: llvm-objdump -disassemble -triple mips64el -mattr +mips64r2 %p/../Inputs/dext-test.elf-mips64r2 \ +RUN: | FileCheck %s + +CHECK: Disassembly of section .text: +CHECK: .text: +CHECK: 0: 08 00 e0 03 jr $ra +CHECK: 4: 43 49 82 7c dext $2, $4, 5, 10 +CHECK: 8: 08 00 e0 03 jr $ra +CHECK: c: 83 28 82 7c dext $2, $4, 2, 6 +CHECK: 10: 08 00 e0 03 jr $ra +CHECK: 14: 43 09 82 7c dext $2, $4, 5, 2 diff --git a/test/Object/Mips/lit.local.cfg b/test/Object/Mips/lit.local.cfg new file mode 100644 index 0000000000..1499317498 --- /dev/null +++ b/test/Object/Mips/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.test'] + +targets = set(config.root.targets_to_build.split()) +if not 'Mips' in targets: + config.unsupported = True diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test index a394a23a7e..6d35a2651d 100644 --- a/test/Object/objdump-relocations.test +++ b/test/Object/objdump-relocations.test @@ -9,6 +9,9 @@ RUN: | FileCheck %s -check-prefix ELF-x86-64 RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-hexagon \ RUN: | FileCheck %s -check-prefix ELF-hexagon +RUN: llvm-objdump -r %p/Inputs/relocations.elf-x86-64 \ +RUN: | FileCheck %s -check-prefix ELF-complex-x86-64 + COFF-i386: .text COFF-i386: IMAGE_REL_I386_DIR32 L_.str COFF-i386: IMAGE_REL_I386_REL32 _puts @@ -36,3 +39,13 @@ ELF-hexagon: R_HEX_HI16 puts ELF-hexagon: R_HEX_LO16 puts ELF-hexagon: R_HEX_B15_PCREL testf ELF-hexagon: R_HEX_B22_PCREL puts + +ELF-complex-x86-64: .text +ELF-complex-x86-64-NEXT: R_X86_64_8 .data-4 +ELF-complex-x86-64-NEXT: R_X86_64_16 .data-4 +ELF-complex-x86-64-NEXT: R_X86_64_32 .data-4 +ELF-complex-x86-64-NEXT: R_X86_64_32S .data-4 +ELF-complex-x86-64-NEXT: R_X86_64_64 .data-4 +ELF-complex-x86-64-NEXT: R_X86_64_PC32 .data-4-P +ELF-complex-x86-64-NEXT: R_X86_64_32 .data+0 +ELF-complex-x86-64-NEXT: R_X86_64_32 .data+4 diff --git a/test/TableGen/if.td b/test/TableGen/if.td index 18de368af9..1d8d62329a 100644 --- a/test/TableGen/if.td +++ b/test/TableGen/if.td @@ -3,15 +3,59 @@ // Support for an `!if' operator as part of a `let' statement. // CHECK: class C -// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, !if({ C:x{2} }, 0, 1), !if({ C:x{2} }, 1, 1), !if({ C:x{2} }, 0, 0), !if({ C:x{1} }, C:y{3}, 0), !if({ C:x{1} }, C:y{2}, 1), !if({ C:x{0} }, C:y{3}, C:z), !if({ C:x{0} }, C:y{2}, C:y{2}), !if({ C:x{0} }, C:y{1}, C:y{1}), !if({ C:x{0} }, C:y{0}, C:y{0}) }; +// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, !if({ C:y{3} }, 1, !if({ C:y{2} }, { C:x{0} }, !if({ C:y{1} }, { C:x{1} }, !if({ C:y{0} }, { C:x{2} }, ?)))){0}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){1}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){0}, !if({ C:x{2} }, 2, 6){2}, !if({ C:x{2} }, 2, 6){1}, !if({ C:x{2} }, 2, 6){0}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){1}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){0}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){3}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){2}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){1}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){0} }; class C<bits<3> x, bits<4> y, bit z> { bits<16> n; + let n{11} = !if(y{3}, 1, + !if(y{2}, x{0}, + !if(y{1}, x{1}, + !if(y{0}, x{2}, ?)))); + let n{10-9}= !if(x{2}, y{3-2}, + !if(x{1}, y{2-1}, + !if(x{0}, y{1-0}, ?))); let n{8-6} = !if(x{2}, 0b010, 0b110); let n{5-4} = !if(x{1}, y{3-2}, {0, 1}); let n{3-0} = !if(x{0}, y{3-0}, {z, y{2}, y{1}, y{0}}); } +def C1 : C<{1, 0, 1}, {0, 1, 0, 1}, 0>; +def C2 : C<{0, 1, 0}, {1, 0, 1, 0}, 1>; +def C3 : C<{0, 0, 0}, {1, 0, 1, 0}, 0>; +def C4 : C<{0, 0, 0}, {0, 0, 0, 0}, 0>; + +// CHECK: def C1 +// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 }; +// CHECK: def C2 +// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0 }; +// CHECK: def C3 +// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, ?, ?, 1, 1, 0, 0, 1, 0, 0, 1, 0 }; +// CHECK: def C4 +// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, 1, 1, 0, 0, 1, 0, 0, 0, 0 }; + +class S<int s> { + bits<2> val = !if(!eq(s, 8), {0, 0}, + !if(!eq(s, 16), 0b01, + !if(!eq(s, 32), 2, + !if(!eq(s, 64), {1, 1}, ?)))); +} + +def D8 : S<8>; +def D16 : S<16>; +def D32 : S<32>; +def D64 : S<64>; +def D128: S<128>; +// CHECK: def D128 +// CHECK-NEXT: bits<2> val = { ?, ? }; +// CHECK: def D16 +// CHECK-NEXT: bits<2> val = { 0, 1 }; +// CHECK: def D32 +// CHECK-NEXT: bits<2> val = { 1, 0 }; +// CHECK: def D64 +// CHECK-NEXT: bits<2> val = { 1, 1 }; +// CHECK: def D8 +// CHECK-NEXT: bits<2> val = { 0, 0 }; + // CHECK: def One // CHECK-NEXT: list<int> first = [1, 2, 3]; // CHECK-NEXT: list<int> rest = [1, 2, 3]; diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td new file mode 100644 index 0000000000..5f3e3dabf4 --- /dev/null +++ b/test/TableGen/list-element-bitref.td @@ -0,0 +1,15 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// XFAIL: vg_leak + +class C<list<bits<8>> L> { + bits<2> V0 = L[0]{1-0}; + bits<2> V1 = L[1]{3-2}; + string V2 = !if(L[0]{0}, "Odd", "Even"); +} + +def c0 : C<[0b0101, 0b1010]>; + +// CHECK: def c0 +// CHECk-NEXT: bits<2> V0 = { 0, 1 }; +// CHECk-NEXT: bits<2> V1 = { 1, 0 }; +// CHECk-NEXT: string V2 = "Odd"; diff --git a/test/TableGen/pr8330.td b/test/TableGen/pr8330.td new file mode 100644 index 0000000000..7779b635e3 --- /dev/null +++ b/test/TableGen/pr8330.td @@ -0,0 +1,29 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// XFAIL: vg_leak + +class Or4<bits<8> Val> { + bits<8> V = {Val{7}, Val{6}, Val{5}, Val{4}, Val{3}, 1, Val{1}, Val{0} }; +} + +class Whatev<bits<8> x>; + +class Whatever<bits<8> x> { + bits<8> W = {x{0}, x{1}, x{2}, x{3}, x{4}, x{5}, x{6}, x{7} }; +} + +multiclass X<bits<8> BaseOpc> { + def bar : Whatev<Or4<BaseOpc>.V >; +} + +multiclass Y<bits<8> BaseOpc> { + def foo : Whatever<Or4<BaseOpc>.V >; +} + +defm a : X<4>; + +// CHECK: def abar + +defm b : Y<8>; + +// CHECK: def bfoo +// CHECK-NEXT: bits<8> W = { 0, 0, 1, 1, 0, 0, 0, 0 }; diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll index 7a8cdd531b..e0eb90af94 100644 --- a/test/Transforms/DeadStoreElimination/simple.ll +++ b/test/Transforms/DeadStoreElimination/simple.ll @@ -310,3 +310,17 @@ define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind { store i32 %c, i32* %4, align 4 ret void } + +; Check another case like PR13547 where strdup is not like malloc. +; CHECK: @test25 +; CHECK: load i8 +; CHECK: store i8 0 +; CHECK: store i8 %tmp +define i8* @test25(i8* %p) nounwind { + %p.4 = getelementptr i8* %p, i64 4 + %tmp = load i8* %p.4, align 1 + store i8 0, i8* %p.4, align 1 + %q = call i8* @strdup(i8* %p) nounwind optsize + store i8 %tmp, i8* %p.4, align 1 + ret i8* %q +} diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll new file mode 100644 index 0000000000..66b6929d30 --- /dev/null +++ b/test/Transforms/GVN/malloc-load-removal.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -basicaa -gvn < %s | FileCheck %s +; RUN: opt -S -basicaa -gvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS +; PR13694 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +declare i8* @malloc(i64) nounwind + +define noalias i8* @test() nounwind uwtable ssp { +entry: + %call = tail call i8* @malloc(i64 100) nounwind + %0 = load i8* %call, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK: @test +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS: @test +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} diff --git a/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll new file mode 100644 index 0000000000..4efaf8c172 --- /dev/null +++ b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +; rdar://12182093 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; CHECK: @udiv400 +; CHECK: udiv i32 %x, 400 +; CHECK: ret +define i32 @udiv400(i32 %x) { +entry: + %div = lshr i32 %x, 2 + %div1 = udiv i32 %div, 100 + ret i32 %div1 +} + + +; CHECK: @udiv400_no +; CHECK: ashr +; CHECK: div +; CHECK: ret +define i32 @udiv400_no(i32 %x) { +entry: + %div = ashr i32 %x, 2 + %div1 = udiv i32 %div, 100 + ret i32 %div1 +} + +; CHECK: @sdiv400_yes +; CHECK: udiv i32 %x, 400 +; CHECK: ret +define i32 @sdiv400_yes(i32 %x) { +entry: + %div = lshr i32 %x, 2 + ; The sign bits of both operands are zero (i.e. we can prove they are + ; unsigned inputs), turn this into a udiv. + ; Next, optimize this just like sdiv. + %div1 = sdiv i32 %div, 100 + ret i32 %div1 +} + + +; CHECK: @udiv_i80 +; CHECK: udiv i80 %x, 400 +; CHECK: ret +define i80 @udiv_i80(i80 %x) { + %div = lshr i80 %x, 2 + %div1 = udiv i80 %div, 100 + ret i80 %div1 +} + +define i32 @no_crash_notconst_udiv(i32 %x, i32 %notconst) { + %div = lshr i32 %x, %notconst + %div1 = udiv i32 %div, 100 + ret i32 %div1 +} diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll index 3f22522a6c..2cb970bf41 100644 --- a/test/Transforms/InstCombine/fold-vector-select.ll +++ b/test/Transforms/InstCombine/fold-vector-select.ll @@ -1,13 +1,148 @@ ; RUN: opt < %s -instcombine -S | not grep select -define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) { - %r = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> zeroinitializer - %g = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 6, i32 9, i32 1> - %b = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 7, i32 1, i32 4, i32 9> - %a = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 8, i32 5> - store <4 x i32> %r, <4 x i32>* %A - store <4 x i32> %g, <4 x i32>* %B - store <4 x i32> %b, <4 x i32>* %C - store <4 x i32> %a, <4 x i32>* %D +define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D, + <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H, + <4 x i32> *%I, <4 x i32> *%J, <4 x i32> *%K, <4 x i32> *%L, + <4 x i32> *%M, <4 x i32> *%N, <4 x i32> *%O, <4 x i32> *%P, + <4 x i32> *%Q, <4 x i32> *%R, <4 x i32> *%S, <4 x i32> *%T, + <4 x i32> *%U, <4 x i32> *%V, <4 x i32> *%W, <4 x i32> *%X, + <4 x i32> *%Y, <4 x i32> *%Z, <4 x i32> *%BA, <4 x i32> *%BB, + <4 x i32> *%BC, <4 x i32> *%BD, <4 x i32> *%BE, <4 x i32> *%BF, + <4 x i32> *%BG, <4 x i32> *%BH, <4 x i32> *%BI, <4 x i32> *%BJ, + <4 x i32> *%BK, <4 x i32> *%BL, <4 x i32> *%BM, <4 x i32> *%BN, + <4 x i32> *%BO, <4 x i32> *%BP, <4 x i32> *%BQ, <4 x i32> *%BR, + <4 x i32> *%BS, <4 x i32> *%BT, <4 x i32> *%BU, <4 x i32> *%BV, + <4 x i32> *%BW, <4 x i32> *%BX, <4 x i32> *%BY, <4 x i32> *%BZ, + <4 x i32> *%CA, <4 x i32> *%CB, <4 x i32> *%CC, <4 x i32> *%CD, + <4 x i32> *%CE, <4 x i32> *%CF, <4 x i32> *%CG, <4 x i32> *%CH, + <4 x i32> *%CI, <4 x i32> *%CJ, <4 x i32> *%CK, <4 x i32> *%CL) { + %a = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 9, i32 87, i32 57, i32 8> + %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 99, i32 49, i32 29> + %c = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 18, i32 53, i32 84> + %d = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 29, i32 82, i32 45, i32 16> + %e = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 11, i32 15, i32 32, i32 99> + %f = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 19, i32 86, i32 29, i32 33> + %g = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 10, i32 26, i32 45> + %h = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 88, i32 70, i32 90, i32 48> + %i = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 30, i32 53, i32 42, i32 12> + %j = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 46, i32 24, i32 93, i32 26> + %k = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 33, i32 99, i32 15, i32 57> + %l = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 51, i32 60, i32 60, i32 50> + %m = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 50, i32 12, i32 7, i32 45> + %n = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 65, i32 36, i32 36> + %o = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 54, i32 0, i32 17, i32 78> + %p = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 56, i32 13, i32 64, i32 48> + %q = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 52, i32 69, i32 88, i32 11>, <4 x i32> zeroinitializer + %r = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 5, i32 87, i32 68, i32 14>, <4 x i32> zeroinitializer + %s = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 47, i32 17, i32 66, i32 63>, <4 x i32> zeroinitializer + %t = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 64, i32 25, i32 73, i32 81>, <4 x i32> zeroinitializer + %u = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 51, i32 41, i32 61, i32 63>, <4 x i32> zeroinitializer + %v = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 39, i32 59, i32 17, i32 0>, <4 x i32> zeroinitializer + %w = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 91, i32 99, i32 97, i32 29>, <4 x i32> zeroinitializer + %x = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 89, i32 45, i32 89, i32 10>, <4 x i32> zeroinitializer + %y = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 25, i32 70, i32 21, i32 27>, <4 x i32> zeroinitializer + %z = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 40, i32 12, i32 27, i32 88>, <4 x i32> zeroinitializer + %ba = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 36, i32 35, i32 90, i32 23>, <4 x i32> zeroinitializer + %bb = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 83, i32 3, i32 64, i32 82>, <4 x i32> zeroinitializer + %bc = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 15, i32 72, i32 2, i32 54>, <4 x i32> zeroinitializer + %bd = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 32, i32 47, i32 100, i32 84>, <4 x i32> zeroinitializer + %be = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 92, i32 57, i32 82, i32 1>, <4 x i32> zeroinitializer + %bf = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 42, i32 14, i32 22, i32 89>, <4 x i32> zeroinitializer + %bg = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 33, i32 10, i32 67, i32 66>, <4 x i32> <i32 42, i32 91, i32 47, i32 40> + %bh = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 8, i32 13, i32 48, i32 0>, <4 x i32> <i32 84, i32 66, i32 87, i32 84> + %bi = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 85, i32 96, i32 1, i32 94>, <4 x i32> <i32 54, i32 57, i32 7, i32 92> + %bj = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 55, i32 21, i32 92, i32 68>, <4 x i32> <i32 51, i32 61, i32 62, i32 39> + %bk = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 42, i32 18, i32 77, i32 74>, <4 x i32> <i32 82, i32 33, i32 30, i32 7> + %bl = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 80, i32 92, i32 61, i32 84>, <4 x i32> <i32 43, i32 89, i32 92, i32 6> + %bm = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 49, i32 14, i32 62, i32 62>, <4 x i32> <i32 35, i32 33, i32 92, i32 59> + %bn = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 3, i32 97, i32 49, i32 18>, <4 x i32> <i32 56, i32 64, i32 19, i32 75> + %bo = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 91, i32 57, i32 0, i32 1>, <4 x i32> <i32 43, i32 63, i32 64, i32 11> + %bp = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 41, i32 65, i32 18, i32 11>, <4 x i32> <i32 86, i32 26, i32 31, i32 3> + %bq = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 31, i32 46, i32 32, i32 68>, <4 x i32> <i32 100, i32 59, i32 62, i32 6> + %br = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 76, i32 67, i32 87, i32 7>, <4 x i32> <i32 63, i32 48, i32 97, i32 24> + %bs = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 83, i32 89, i32 19, i32 4>, <4 x i32> <i32 21, i32 2, i32 40, i32 21> + %bt = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 45, i32 76, i32 81, i32 100>, <4 x i32> <i32 65, i32 26, i32 100, i32 46> + %bu = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 16, i32 75, i32 31, i32 17>, <4 x i32> <i32 37, i32 66, i32 86, i32 65> + %bv = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 13, i32 25, i32 43, i32 59>, <4 x i32> <i32 82, i32 78, i32 60, i32 52> + %bw = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %bx = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %by = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %bz = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %ca = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cb = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cc = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cd = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %ce = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cf = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cg = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %ch = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %ci = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cj = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %ck = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + %cl = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer + store <4 x i32> %a, <4 x i32>* %A + store <4 x i32> %b, <4 x i32>* %B + store <4 x i32> %c, <4 x i32>* %C + store <4 x i32> %d, <4 x i32>* %D + store <4 x i32> %e, <4 x i32>* %E + store <4 x i32> %f, <4 x i32>* %F + store <4 x i32> %g, <4 x i32>* %G + store <4 x i32> %h, <4 x i32>* %H + store <4 x i32> %i, <4 x i32>* %I + store <4 x i32> %j, <4 x i32>* %J + store <4 x i32> %k, <4 x i32>* %K + store <4 x i32> %l, <4 x i32>* %L + store <4 x i32> %m, <4 x i32>* %M + store <4 x i32> %n, <4 x i32>* %N + store <4 x i32> %o, <4 x i32>* %O + store <4 x i32> %p, <4 x i32>* %P + store <4 x i32> %q, <4 x i32>* %Q + store <4 x i32> %r, <4 x i32>* %R + store <4 x i32> %s, <4 x i32>* %S + store <4 x i32> %t, <4 x i32>* %T + store <4 x i32> %u, <4 x i32>* %U + store <4 x i32> %v, <4 x i32>* %V + store <4 x i32> %w, <4 x i32>* %W + store <4 x i32> %x, <4 x i32>* %X + store <4 x i32> %y, <4 x i32>* %Y + store <4 x i32> %z, <4 x i32>* %Z + store <4 x i32> %ba, <4 x i32>* %BA + store <4 x i32> %bb, <4 x i32>* %BB + store <4 x i32> %bc, <4 x i32>* %BC + store <4 x i32> %bd, <4 x i32>* %BD + store <4 x i32> %be, <4 x i32>* %BE + store <4 x i32> %bf, <4 x i32>* %BF + store <4 x i32> %bg, <4 x i32>* %BG + store <4 x i32> %bh, <4 x i32>* %BH + store <4 x i32> %bi, <4 x i32>* %BI + store <4 x i32> %bj, <4 x i32>* %BJ + store <4 x i32> %bk, <4 x i32>* %BK + store <4 x i32> %bl, <4 x i32>* %BL + store <4 x i32> %bm, <4 x i32>* %BM + store <4 x i32> %bn, <4 x i32>* %BN + store <4 x i32> %bo, <4 x i32>* %BO + store <4 x i32> %bp, <4 x i32>* %BP + store <4 x i32> %bq, <4 x i32>* %BQ + store <4 x i32> %br, <4 x i32>* %BR + store <4 x i32> %bs, <4 x i32>* %BS + store <4 x i32> %bt, <4 x i32>* %BT + store <4 x i32> %bu, <4 x i32>* %BU + store <4 x i32> %bv, <4 x i32>* %BV + store <4 x i32> %bw, <4 x i32>* %BW + store <4 x i32> %bx, <4 x i32>* %BX + store <4 x i32> %by, <4 x i32>* %BY + store <4 x i32> %bz, <4 x i32>* %BZ + store <4 x i32> %ca, <4 x i32>* %CA + store <4 x i32> %cb, <4 x i32>* %CB + store <4 x i32> %cc, <4 x i32>* %CC + store <4 x i32> %cd, <4 x i32>* %CD + store <4 x i32> %ce, <4 x i32>* %CE + store <4 x i32> %cf, <4 x i32>* %CF + store <4 x i32> %cg, <4 x i32>* %CG + store <4 x i32> %ch, <4 x i32>* %CH + store <4 x i32> %ci, <4 x i32>* %CI + store <4 x i32> %cj, <4 x i32>* %CJ + store <4 x i32> %ck, <4 x i32>* %CK + store <4 x i32> %cl, <4 x i32>* %CL ret void } diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll index d95e8f8359..74f2fdd7cc 100644 --- a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll +++ b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll @@ -6,9 +6,9 @@ ; The udiv instructions shouldn't be optimized away, and the ; sext instructions should be optimized to zext. -define i64 @bar(i32 %x) nounwind { +define i64 @bar(i32 %x, i32 %g) nounwind { %y = lshr i32 %x, 30 - %r = udiv i32 %y, 3 + %r = udiv i32 %y, %g %z = sext i32 %r to i64 ret i64 %z } diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll index 8a81857736..9676efec9d 100644 --- a/test/Transforms/JumpThreading/select.ll +++ b/test/Transforms/JumpThreading/select.ll @@ -121,3 +121,39 @@ L4: call void @quux() br label %L0 } + +; Make sure the edge value of %0 from entry to L2 includes 0 and L3 is +; reachable. +; CHECK: test_switch_default +; CHECK: entry: +; CHECK: load +; CHECK: switch +; CHECK: [[THREADED:[A-Za-z.0-9]+]]: +; CHECK: store +; CHECK: br +; CHECK: L2: +; CHECK: icmp +define void @test_switch_default(i32* nocapture %status) nounwind { +entry: + %0 = load i32* %status, align 4 + switch i32 %0, label %L2 [ + i32 5061, label %L1 + i32 0, label %L2 + ] + +L1: + store i32 10025, i32* %status, align 4 + br label %L2 + +L2: + %1 = load i32* %status, align 4 + %cmp57.i = icmp eq i32 %1, 0 + br i1 %cmp57.i, label %L3, label %L4 + +L3: + store i32 10000, i32* %status, align 4 + br label %L4 + +L4: + ret void +} diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll index 6f28d53af6..98f93345e3 100644 --- a/test/Transforms/LICM/hoisting.ll +++ b/test/Transforms/LICM/hoisting.ll @@ -29,7 +29,7 @@ Out: ; preds = %LoopTail } -declare void @foo2(i32) +declare void @foo2(i32) nounwind ;; It is ok and desirable to hoist this potentially trapping instruction. @@ -64,3 +64,29 @@ Out: ; preds = %Loop %C = sub i32 %A, %B ; <i32> [#uses=1] ret i32 %C } + +; CHECK: @test4 +; CHECK: call +; CHECK: sdiv +; CHECK: ret +define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] + call void @foo_may_call_exit(i32 0) + %div = sdiv i32 %x, %y + %add = add nsw i32 %n.01, %div + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %n.0.lcssa = phi i32 [ %add, %for.body ] + ret i32 %n.0.lcssa +} + +declare void @foo_may_call_exit(i32) + diff --git a/test/Transforms/LoopRotate/multiple-exits.ll b/test/Transforms/LoopRotate/multiple-exits.ll new file mode 100644 index 0000000000..675d71f60d --- /dev/null +++ b/test/Transforms/LoopRotate/multiple-exits.ll @@ -0,0 +1,236 @@ +; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; PR7447 +define i32 @test1([100 x i32]* nocapture %a) nounwind readonly { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond1, %entry + %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ] + %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ] + br i1 %i.0, label %for.cond1, label %return + +for.cond1: ; preds = %for.cond, %land.rhs + %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ] + %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ] + %cmp2 = icmp ult i32 %i.1, 100 + br i1 %cmp2, label %land.rhs, label %for.cond + +land.rhs: ; preds = %for.cond1 + %conv = zext i32 %i.1 to i64 + %arrayidx = getelementptr inbounds [100 x i32]* %a, i64 0, i64 %conv + %0 = load i32* %arrayidx, align 4 + %add = add i32 %0, %sum.1 + %cmp4 = icmp ugt i32 %add, 1000 + %inc = add i32 %i.1, 1 + br i1 %cmp4, label %return, label %for.cond1 + +return: ; preds = %for.cond, %land.rhs + %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ] + ret i32 %retval.0 + +; CHECK: @test1 +; CHECK: for.cond1.preheader: +; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ] +; CHECK: br label %for.cond1 + +; CHECK: for.cond1: +; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ] +; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ] +; CHECK: %cmp2 = icmp ult i32 %i.1, 100 +; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit +} + +define void @test2(i32 %x) nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] + %cmp = icmp eq i32 %i.0, %x + br i1 %cmp, label %return.loopexit, label %for.body + +for.body: ; preds = %for.cond + %call = tail call i32 @foo(i32 %i.0) nounwind + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.end, label %a + +if.end: ; preds = %for.body + %call1 = tail call i32 @foo(i32 42) nounwind + %inc = add i32 %i.0, 1 + br label %for.cond + +a: ; preds = %for.body + %call2 = tail call i32 @bar(i32 1) nounwind + br label %return + +return.loopexit: ; preds = %for.cond + br label %return + +return: ; preds = %return.loopexit, %a + ret void + +; CHECK: @test2 +; CHECK: if.end: +; CHECK: %inc = add i32 %i.02, 1 +; CHECK: %cmp = icmp eq i32 %inc, %x +; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body +} + +declare i32 @foo(i32) + +declare i32 @bar(i32) + +@_ZTIi = external constant i8* + +; Verify dominators. +define void @test3(i32 %x) { +entry: + %cmp2 = icmp eq i32 0, %x + br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + invoke void @_Z3fooi(i32 %i.03) + to label %for.inc unwind label %lpad + +for.inc: ; preds = %for.body + %inc = add i32 %i.03, 1 + %cmp = icmp eq i32 %inc, %x + br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body + +lpad: ; preds = %for.body + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* bitcast (i8** @_ZTIi to i8*) + %1 = extractvalue { i8*, i32 } %0, 0 + %2 = extractvalue { i8*, i32 } %0, 1 + %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind + %matches = icmp eq i32 %2, %3 + br i1 %matches, label %catch, label %eh.resume + +catch: ; preds = %lpad + %4 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind + br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph + +for.body.i.lr.ph: ; preds = %catch + br label %for.body.i + +for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i + %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ] + invoke void @_Z3fooi(i32 %i.0.i1) + to label %for.inc.i unwind label %lpad.i + +for.inc.i: ; preds = %for.body.i + %inc.i = add i32 %i.0.i1, 1 + %cmp.i = icmp eq i32 %inc.i, 0 + br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i + +lpad.i: ; preds = %for.body.i + %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* bitcast (i8** @_ZTIi to i8*) + %6 = extractvalue { i8*, i32 } %5, 0 + %7 = extractvalue { i8*, i32 } %5, 1 + %matches.i = icmp eq i32 %7, %3 + br i1 %matches.i, label %catch.i, label %lpad1.body + +catch.i: ; preds = %lpad.i + %8 = tail call i8* @__cxa_begin_catch(i8* %6) nounwind + invoke void @test3(i32 0) + to label %invoke.cont2.i unwind label %lpad1.i + +invoke.cont2.i: ; preds = %catch.i + tail call void @__cxa_end_catch() nounwind + br label %invoke.cont2 + +lpad1.i: ; preds = %catch.i + %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + cleanup + %10 = extractvalue { i8*, i32 } %9, 0 + %11 = extractvalue { i8*, i32 } %9, 1 + tail call void @__cxa_end_catch() nounwind + br label %lpad1.body + +for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i + br label %invoke.cont2.loopexit + +invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch + br label %invoke.cont2 + +invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i + tail call void @__cxa_end_catch() nounwind + br label %try.cont + +for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc + br label %try.cont.loopexit + +try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry + br label %try.cont + +try.cont: ; preds = %try.cont.loopexit, %invoke.cont2 + ret void + +lpad1.body: ; preds = %lpad1.i, %lpad.i + %exn.slot.0.i = phi i8* [ %10, %lpad1.i ], [ %6, %lpad.i ] + %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ] + tail call void @__cxa_end_catch() nounwind + br label %eh.resume + +eh.resume: ; preds = %lpad1.body, %lpad + %exn.slot.0 = phi i8* [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ] + %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ] + %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0 + %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1 + resume { i8*, i32 } %lpad.val5 +} + +declare void @_Z3fooi(i32) + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +define void @test4() nounwind uwtable { +entry: + br label %"7" + +"3": ; preds = %"7" + br i1 undef, label %"31", label %"4" + +"4": ; preds = %"3" + %. = select i1 undef, float 0x3F50624DE0000000, float undef + %0 = add i32 %1, 1 + br label %"7" + +"7": ; preds = %"4", %entry + %1 = phi i32 [ %0, %"4" ], [ 0, %entry ] + %2 = icmp slt i32 %1, 100 + br i1 %2, label %"3", label %"8" + +"8": ; preds = %"7" + br i1 undef, label %"9", label %"31" + +"9": ; preds = %"8" + br label %"33" + +"27": ; preds = %"31" + unreachable + +"31": ; preds = %"8", %"3" + br i1 undef, label %"27", label %"32" + +"32": ; preds = %"31" + br label %"33" + +"33": ; preds = %"32", %"9" + ret void +} diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll index 0a7ba5de71..7b64b1be7c 100644 --- a/test/Transforms/ObjCARC/basic.ll +++ b/test/Transforms/ObjCARC/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt -objc-arc -S < %s | FileCheck %s +; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s target datalayout = "e-p:64:64:64" @@ -1498,7 +1498,7 @@ define i8* @test49(i8* %p) nounwind { } ; Do delete retain+release with intervening stores of the -; address value; +; address value. ; CHECK: define void @test50( ; CHECK-NOT: @objc_ diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll index a618a21d8b..32be03ec6a 100644 --- a/test/Transforms/ObjCARC/nested.ll +++ b/test/Transforms/ObjCARC/nested.ll @@ -16,6 +16,10 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind declare void @use(i8*) declare void @objc_release(i8*) +declare i8* @def() +declare void @__crasher_block_invoke(i8* nocapture) +declare i8* @objc_retainBlock(i8*) +declare void @__crasher_block_invoke1(i8* nocapture) !0 = metadata !{} @@ -279,11 +283,13 @@ forcoll.empty: ret void } -; Delete a nested retain+release pair. +; TODO: Delete a nested retain+release pair. +; The optimizer currently can't do this, because isn't isn't sophisticated enough in +; reasnoning about nesting. ; CHECK: define void @test6( ; CHECK: call i8* @objc_retain -; CHECK-NOT: @objc_retain +; CHECK: @objc_retain ; CHECK: } define void @test6() nounwind { entry: @@ -345,11 +351,13 @@ forcoll.empty: ret void } -; Delete a nested retain+release pair. +; TODO: Delete a nested retain+release pair. +; The optimizer currently can't do this, because isn't isn't sophisticated enough in +; reasnoning about nesting. ; CHECK: define void @test7( ; CHECK: call i8* @objc_retain -; CHECK-NOT: @objc_retain +; CHECK: @objc_retain ; CHECK: } define void @test7() nounwind { entry: @@ -553,12 +561,12 @@ forcoll.empty: ret void } -; Like test9, but without a split backedge. This we can optimize. +; Like test9, but without a split backedge. TODO: optimize this. ; CHECK: define void @test9b( ; CHECK: call i8* @objc_retain ; CHECK: call i8* @objc_retain -; CHECK-NOT: @objc_retain +; CHECK: @objc_retain ; CHECK: } define void @test9b() nounwind { entry: @@ -687,12 +695,12 @@ forcoll.empty: ret void } -; Like test10, but without a split backedge. This we can optimize. +; Like test10, but without a split backedge. TODO: optimize this. ; CHECK: define void @test10b( ; CHECK: call i8* @objc_retain ; CHECK: call i8* @objc_retain -; CHECK-NOT: @objc_retain +; CHECK: @objc_retain ; CHECK: } define void @test10b() nounwind { entry: @@ -751,3 +759,64 @@ forcoll.empty: call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0 ret void } + +; Pointers to strong pointers can obscure provenance relationships. Be conservative +; in the face of escaping pointers. rdar://12150909. + +%struct.__block_d = type { i64, i64 } + +@_NSConcreteStackBlock = external global i8* +@__block_d_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* } +@__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* } + +; CHECK: define void @test11( +; CHECK: tail call i8* @objc_retain(i8* %call) nounwind +; CHECK: tail call i8* @objc_retain(i8* %call) nounwind +; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0 +; CHECK: } +define void @test11() { +entry: + %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8 + %block9 = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8 + %call = call i8* @def(), !clang.arc.no_objc_arc_exceptions !0 + %foo = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 5 + %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 0 + store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8 + %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 1 + store i32 1107296256, i32* %block.flags, align 8 + %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 2 + store i32 0, i32* %block.reserved, align 4 + %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 3 + store i8* bitcast (void (i8*)* @__crasher_block_invoke to i8*), i8** %block.invoke, align 8 + %block.d = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 4 + store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp to %struct.__block_d*), %struct.__block_d** %block.d, align 8 + %foo2 = tail call i8* @objc_retain(i8* %call) nounwind + store i8* %foo2, i8** %foo, align 8 + %foo4 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block to i8* + %foo5 = call i8* @objc_retainBlock(i8* %foo4) nounwind + call void @use(i8* %foo5), !clang.arc.no_objc_arc_exceptions !0 + call void @objc_release(i8* %foo5) nounwind + %strongdestroy = load i8** %foo, align 8 + call void @objc_release(i8* %strongdestroy) nounwind, !clang.imprecise_release !0 + %foo10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 5 + %block.isa11 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 0 + store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa11, align 8 + %block.flags12 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 1 + store i32 1107296256, i32* %block.flags12, align 8 + %block.reserved13 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 2 + store i32 0, i32* %block.reserved13, align 4 + %block.invoke14 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 3 + store i8* bitcast (void (i8*)* @__crasher_block_invoke1 to i8*), i8** %block.invoke14, align 8 + %block.d15 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 4 + store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp5 to %struct.__block_d*), %struct.__block_d** %block.d15, align 8 + %foo18 = call i8* @objc_retain(i8* %call) nounwind + store i8* %call, i8** %foo10, align 8 + %foo20 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9 to i8* + %foo21 = call i8* @objc_retainBlock(i8* %foo20) nounwind + call void @use(i8* %foo21), !clang.arc.no_objc_arc_exceptions !0 + call void @objc_release(i8* %foo21) nounwind + %strongdestroy25 = load i8** %foo10, align 8 + call void @objc_release(i8* %strongdestroy25) nounwind, !clang.imprecise_release !0 + call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0 + ret void +} diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll index 546cc75f29..b28e4a4550 100644 --- a/test/Transforms/SimplifyCFG/switch_create.ll +++ b/test/Transforms/SimplifyCFG/switch_create.ll @@ -141,8 +141,9 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 ret i1 %UnifiedRetVal ; CHECK: @test6 -; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14 -; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6 +; CHECK: %switch.tableidx = sub i32 %tmp.2.i, 14 +; CHECK: %0 = icmp ult i32 %switch.tableidx, 6 +; CHECK: select i1 %0, i1 true, i1 false } define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone { diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll new file mode 100644 index 0000000000..414da93976 --- /dev/null +++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll @@ -0,0 +1,140 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The table for @f +; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] + +; The int table for @h +; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" + +; The float table for @h +; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] + +; The table for @foostring +; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] + +; A simple int-to-int selection switch. +; It is dense enough to be replaced by table lookup. +; The result is directly by a ret from an otherwise empty bb, +; so we return early, directly from the lookup bb. + +define i32 @f(i32 %c) nounwind uwtable readnone { +entry: + switch i32 %c, label %sw.default [ + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 + i32 46, label %sw.bb4 + i32 47, label %sw.bb5 + i32 48, label %sw.bb6 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.bb5: br label %return +sw.bb6: br label %return +sw.default: br label %return +return: + %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ] + ret i32 %retval.0 + +; CHECK: @f +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i32* %switch.gep +; CHECK-NEXT: ret i32 %switch.load +; CHECK: return: +; CHECK-NEXT: ret i32 15 +} + +; A switch used to initialize two variables, an i8 and a float. + +declare void @dummy(i8 signext, float) +define void @h(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.epilog + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog + +sw.epilog: + %a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ] + %b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ] + call void @dummy(i8 signext %a.0, float %b.0) + ret void + +; CHECK: @h +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i8* %switch.gep +; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load2 = load float* %switch.gep1 +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.epilog: +; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] +; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] +; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0) +; CHECK-NEXT: ret void +} + + +; Switch used to return a string. + +@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1 +@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1 +@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1 +@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1 + +define i8* @foostring(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %return + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.default: br label %return + +return: + %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ], + [ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ], + [ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ], + [ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ], + [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ] + ret i8* %retval.0 + +; CHECK: @foostring +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i8** %switch.gep +; CHECK-NEXT: ret i8* %switch.load +} diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll new file mode 100644 index 0000000000..b4ab8b4ceb --- /dev/null +++ b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll @@ -0,0 +1,333 @@ +; RUN: opt < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define float @acos_test(float %f) nounwind readnone { +; CHECK: acos_test + %conv = fpext float %f to double + %call = call double @acos(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @acosf(float %f) +} + +define double @acos_test2(float %f) nounwind readnone { +; CHECK: acos_test2 + %conv = fpext float %f to double + %call = call double @acos(double %conv) + ret double %call +; CHECK: call double @acos(double %conv) +} + +define float @acosh_test(float %f) nounwind readnone { +; CHECK: acosh_test + %conv = fpext float %f to double + %call = call double @acosh(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @acoshf(float %f) +} + +define double @acosh_test2(float %f) nounwind readnone { +; CHECK: acosh_test2 + %conv = fpext float %f to double + %call = call double @acosh(double %conv) + ret double %call +; CHECK: call double @acosh(double %conv) +} + +define float @asin_test(float %f) nounwind readnone { +; CHECK: asin_test + %conv = fpext float %f to double + %call = call double @asin(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @asinf(float %f) +} + +define double @asin_test2(float %f) nounwind readnone { +; CHECK: asin_test2 + %conv = fpext float %f to double + %call = call double @asin(double %conv) + ret double %call +; CHECK: call double @asin(double %conv) +} + +define float @asinh_test(float %f) nounwind readnone { +; CHECK: asinh_test + %conv = fpext float %f to double + %call = call double @asinh(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @asinhf(float %f) +} + +define double @asinh_test2(float %f) nounwind readnone { +; CHECK: asinh_test2 + %conv = fpext float %f to double + %call = call double @asinh(double %conv) + ret double %call +; CHECK: call double @asinh(double %conv) +} + +define float @atan_test(float %f) nounwind readnone { +; CHECK: atan_test + %conv = fpext float %f to double + %call = call double @atan(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @atanf(float %f) +} + +define double @atan_test2(float %f) nounwind readnone { +; CHECK: atan_test2 + %conv = fpext float %f to double + %call = call double @atan(double %conv) + ret double %call +; CHECK: call double @atan(double %conv) +} +define float @atanh_test(float %f) nounwind readnone { +; CHECK: atanh_test + %conv = fpext float %f to double + %call = call double @atanh(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @atanhf(float %f) +} + +define double @atanh_test2(float %f) nounwind readnone { +; CHECK: atanh_test2 + %conv = fpext float %f to double + %call = call double @atanh(double %conv) + ret double %call +; CHECK: call double @atanh(double %conv) +} +define float @cbrt_test(float %f) nounwind readnone { +; CHECK: cbrt_test + %conv = fpext float %f to double + %call = call double @cbrt(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @cbrtf(float %f) +} + +define double @cbrt_test2(float %f) nounwind readnone { +; CHECK: cbrt_test2 + %conv = fpext float %f to double + %call = call double @cbrt(double %conv) + ret double %call +; CHECK: call double @cbrt(double %conv) +} +define float @exp_test(float %f) nounwind readnone { +; CHECK: exp_test + %conv = fpext float %f to double + %call = call double @exp(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @expf(float %f) +} + +define double @exp_test2(float %f) nounwind readnone { +; CHECK: exp_test2 + %conv = fpext float %f to double + %call = call double @exp(double %conv) + ret double %call +; CHECK: call double @exp(double %conv) +} +define float @expm1_test(float %f) nounwind readnone { +; CHECK: expm1_test + %conv = fpext float %f to double + %call = call double @expm1(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @expm1f(float %f) +} + +define double @expm1_test2(float %f) nounwind readnone { +; CHECK: expm1_test2 + %conv = fpext float %f to double + %call = call double @expm1(double %conv) + ret double %call +; CHECK: call double @expm1(double %conv) +} +define float @exp10_test(float %f) nounwind readnone { +; CHECK: exp10_test + %conv = fpext float %f to double + %call = call double @exp10(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @exp10f(float %f) +} + +define double @exp10_test2(float %f) nounwind readnone { +; CHECK: exp10_test2 + %conv = fpext float %f to double + %call = call double @exp10(double %conv) + ret double %call +; CHECK: call double @exp10(double %conv) +} +define float @log_test(float %f) nounwind readnone { +; CHECK: log_test + %conv = fpext float %f to double + %call = call double @log(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @logf(float %f) +} + +define double @log_test2(float %f) nounwind readnone { +; CHECK: log_test2 + %conv = fpext float %f to double + %call = call double @log(double %conv) + ret double %call +; CHECK: call double @log(double %conv) +} +define float @log10_test(float %f) nounwind readnone { +; CHECK: log10_test + %conv = fpext float %f to double + %call = call double @log10(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @log10f(float %f) +} + +define double @log10_test2(float %f) nounwind readnone { +; CHECK: log10_test2 + %conv = fpext float %f to double + %call = call double @log10(double %conv) + ret double %call +; CHECK: call double @log10(double %conv) +} +define float @log1p_test(float %f) nounwind readnone { +; CHECK: log1p_test + %conv = fpext float %f to double + %call = call double @log1p(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @log1pf(float %f) +} + +define double @log1p_test2(float %f) nounwind readnone { +; CHECK: log1p_test2 + %conv = fpext float %f to double + %call = call double @log1p(double %conv) + ret double %call +; CHECK: call double @log1p(double %conv) +} +define float @log2_test(float %f) nounwind readnone { +; CHECK: log2_test + %conv = fpext float %f to double + %call = call double @log2(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @log2f(float %f) +} + +define double @log2_test2(float %f) nounwind readnone { +; CHECK: log2_test2 + %conv = fpext float %f to double + %call = call double @log2(double %conv) + ret double %call +; CHECK: call double @log2(double %conv) +} +define float @logb_test(float %f) nounwind readnone { +; CHECK: logb_test + %conv = fpext float %f to double + %call = call double @logb(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @logbf(float %f) +} + +define double @logb_test2(float %f) nounwind readnone { +; CHECK: logb_test2 + %conv = fpext float %f to double + %call = call double @logb(double %conv) + ret double %call +; CHECK: call double @logb(double %conv) +} +define float @sin_test(float %f) nounwind readnone { +; CHECK: sin_test + %conv = fpext float %f to double + %call = call double @sin(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @sinf(float %f) +} + +define double @sin_test2(float %f) nounwind readnone { +; CHECK: sin_test2 + %conv = fpext float %f to double + %call = call double @sin(double %conv) + ret double %call +; CHECK: call double @sin(double %conv) +} +define float @sqrt_test(float %f) nounwind readnone { +; CHECK: sqrt_test + %conv = fpext float %f to double + %call = call double @sqrt(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @sqrtf(float %f) +} + +define double @sqrt_test2(float %f) nounwind readnone { +; CHECK: sqrt_test2 + %conv = fpext float %f to double + %call = call double @sqrt(double %conv) + ret double %call +; CHECK: call double @sqrt(double %conv) +} +define float @tan_test(float %f) nounwind readnone { +; CHECK: tan_test + %conv = fpext float %f to double + %call = call double @tan(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @tanf(float %f) +} + +define double @tan_test2(float %f) nounwind readnone { +; CHECK: tan_test2 + %conv = fpext float %f to double + %call = call double @tan(double %conv) + ret double %call +; CHECK: call double @tan(double %conv) +} +define float @tanh_test(float %f) nounwind readnone { +; CHECK: tanh_test + %conv = fpext float %f to double + %call = call double @tanh(double %conv) + %conv1 = fptrunc double %call to float + ret float %conv1 +; CHECK: call float @tanhf(float %f) +} + +define double @tanh_test2(float %f) nounwind readnone { +; CHECK: tanh_test2 + %conv = fpext float %f to double + %call = call double @tanh(double %conv) + ret double %call +; CHECK: call double @tanh(double %conv) +} + +declare double @tanh(double) nounwind readnone +declare double @tan(double) nounwind readnone +declare double @sqrt(double) nounwind readnone +declare double @sin(double) nounwind readnone +declare double @log2(double) nounwind readnone +declare double @log1p(double) nounwind readnone +declare double @log10(double) nounwind readnone +declare double @log(double) nounwind readnone +declare double @logb(double) nounwind readnone +declare double @exp10(double) nounwind readnone +declare double @expm1(double) nounwind readnone +declare double @exp(double) nounwind readnone +declare double @cbrt(double) nounwind readnone +declare double @atanh(double) nounwind readnone +declare double @atan(double) nounwind readnone +declare double @acos(double) nounwind readnone +declare double @acosh(double) nounwind readnone +declare double @asin(double) nounwind readnone +declare double @asinh(double) nounwind readnone diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll new file mode 100644 index 0000000000..aecb887beb --- /dev/null +++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll @@ -0,0 +1,179 @@ +; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @test1(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @ceil(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test1 +; CHECK-NEXT: %ceilf = call float @ceilf(float %x) +; CHECK-NEXT: fcmp oeq float %ceilf, %y +} + +define i32 @test2(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @fabs(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test2 +; CHECK-NEXT: %fabsf = call float @fabsf(float %x) +; CHECK-NEXT: fcmp oeq float %fabsf, %y +} + +define i32 @test3(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @floor(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test3 +; CHECK-NEXT: %floorf = call float @floorf(float %x) +; CHECK-NEXT: fcmp oeq float %floorf, %y +} + +define i32 @test4(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @nearbyint(double %1) nounwind + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test4 +; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x) +; CHECK-NEXT: fcmp oeq float %nearbyintf, %y +} + +define i32 @test5(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @rint(double %1) nounwind + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test5 +; CHECK-NEXT: %rintf = call float @rintf(float %x) +; CHECK-NEXT: fcmp oeq float %rintf, %y +} + +define i32 @test6(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @round(double %1) nounwind readnone + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test6 +; CHECK-NEXT: %roundf = call float @roundf(float %x) +; CHECK-NEXT: fcmp oeq float %roundf, %y +} + +define i32 @test7(float %x, float %y) nounwind uwtable { + %1 = fpext float %x to double + %2 = call double @trunc(double %1) nounwind + %3 = fpext float %y to double + %4 = fcmp oeq double %2, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test7 +; CHECK-NEXT: %truncf = call float @truncf(float %x) +; CHECK-NEXT: fcmp oeq float %truncf, %y +} + +define i32 @test8(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @ceil(double %2) nounwind readnone + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test8 +; CHECK-NEXT: %ceilf = call float @ceilf(float %x) +; CHECK-NEXT: fcmp oeq float %ceilf, %y +} + +define i32 @test9(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @fabs(double %2) nounwind readnone + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test9 +; CHECK-NEXT: %fabsf = call float @fabsf(float %x) +; CHECK-NEXT: fcmp oeq float %fabsf, %y +} + +define i32 @test10(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @floor(double %2) nounwind readnone + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test10 +; CHECK-NEXT: %floorf = call float @floorf(float %x) +; CHECK-NEXT: fcmp oeq float %floorf, %y +} + +define i32 @test11(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @nearbyint(double %2) nounwind + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test11 +; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x) +; CHECK-NEXT: fcmp oeq float %nearbyintf, %y +} + +define i32 @test12(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @rint(double %2) nounwind + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test12 +; CHECK-NEXT: %rintf = call float @rintf(float %x) +; CHECK-NEXT: fcmp oeq float %rintf, %y +} + +define i32 @test13(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @round(double %2) nounwind readnone + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test13 +; CHECK-NEXT: %roundf = call float @roundf(float %x) +; CHECK-NEXT: fcmp oeq float %roundf, %y +} + +define i32 @test14(float %x, float %y) nounwind uwtable { + %1 = fpext float %y to double + %2 = fpext float %x to double + %3 = call double @trunc(double %2) nounwind + %4 = fcmp oeq double %1, %3 + %5 = zext i1 %4 to i32 + ret i32 %5 +; CHECK: @test14 +; CHECK-NEXT: %truncf = call float @truncf(float %x) +; CHECK-NEXT: fcmp oeq float %truncf, %y +} + +declare double @fabs(double) nounwind readnone +declare double @ceil(double) nounwind readnone +declare double @floor(double) nounwind readnone +declare double @nearbyint(double) nounwind readnone +declare double @rint(double) nounwind readnone +declare double @round(double) nounwind readnone +declare double @trunc(double) nounwind readnone diff --git a/tools/gold/Makefile b/tools/gold/Makefile index 79f86a85c3..31812e1f8c 100644 --- a/tools/gold/Makefile +++ b/tools/gold/Makefile @@ -28,6 +28,8 @@ include $(LEVEL)/Makefile.config # Because off_t is used in the public API, the largefile parts are required for # ABI compatibility. CXXFLAGS += -I$(BINUTILS_INCDIR) -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -CXXFLAGS += -L$(SharedLibDir)/$(SharedPrefix) -lLTO +LDFLAGS += -L$(SharedLibDir)/$(SharedPrefix) include $(LEVEL)/Makefile.common + +LIBS += -lLTO diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt index a5d2e61ea2..68cb921028 100644 --- a/tools/lli/CMakeLists.txt +++ b/tools/lli/CMakeLists.txt @@ -19,4 +19,6 @@ endif( LLVM_USE_INTEL_JITEVENTS ) add_llvm_tool(lli lli.cpp + RecordingMemoryManager.cpp + RemoteTarget.cpp ) diff --git a/tools/lli/RecordingMemoryManager.cpp b/tools/lli/RecordingMemoryManager.cpp new file mode 100644 index 0000000000..9e1cff5527 --- /dev/null +++ b/tools/lli/RecordingMemoryManager.cpp @@ -0,0 +1,87 @@ +//===- RecordingMemoryManager.cpp - Recording memory manager --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This memory manager allocates local storage and keeps a record of each +// allocation. Iterators are provided for all data and code allocations. +// +//===----------------------------------------------------------------------===// + +#include "RecordingMemoryManager.h" +using namespace llvm; + +uint8_t *RecordingMemoryManager:: +allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { + // The recording memory manager is just a local copy of the remote target. + // The alignment requirement is just stored here for later use. Regular + // heap storage is sufficient here. + void *Addr = malloc(Size); + assert(Addr && "malloc() failure!"); + sys::MemoryBlock Block(Addr, Size); + AllocatedCodeMem.push_back(Allocation(Block, Alignment)); + return (uint8_t*)Addr; +} + +uint8_t *RecordingMemoryManager:: +allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { + // The recording memory manager is just a local copy of the remote target. + // The alignment requirement is just stored here for later use. Regular + // heap storage is sufficient here. + void *Addr = malloc(Size); + assert(Addr && "malloc() failure!"); + sys::MemoryBlock Block(Addr, Size); + AllocatedDataMem.push_back(Allocation(Block, Alignment)); + return (uint8_t*)Addr; +} +void RecordingMemoryManager::setMemoryWritable() { llvm_unreachable("Unexpected!"); } +void RecordingMemoryManager::setMemoryExecutable() { llvm_unreachable("Unexpected!"); } +void RecordingMemoryManager::setPoisonMemory(bool poison) { llvm_unreachable("Unexpected!"); } +void RecordingMemoryManager::AllocateGOT() { llvm_unreachable("Unexpected!"); } +uint8_t *RecordingMemoryManager::getGOTBase() const { + llvm_unreachable("Unexpected!"); + return 0; +} +uint8_t *RecordingMemoryManager::startFunctionBody(const Function *F, uintptr_t &ActualSize){ + llvm_unreachable("Unexpected!"); + return 0; +} +uint8_t *RecordingMemoryManager::allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment) { + llvm_unreachable("Unexpected!"); + return 0; +} +void RecordingMemoryManager::endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + llvm_unreachable("Unexpected!"); +} +uint8_t *RecordingMemoryManager::allocateSpace(intptr_t Size, unsigned Alignment) { + llvm_unreachable("Unexpected!"); + return 0; +} +uint8_t *RecordingMemoryManager::allocateGlobal(uintptr_t Size, unsigned Alignment) { + llvm_unreachable("Unexpected!"); + return 0; +} +void RecordingMemoryManager::deallocateFunctionBody(void *Body) { + llvm_unreachable("Unexpected!"); +} +uint8_t* RecordingMemoryManager::startExceptionTable(const Function* F, uintptr_t &ActualSize) { + llvm_unreachable("Unexpected!"); + return 0; +} +void RecordingMemoryManager::endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister) { + llvm_unreachable("Unexpected!"); +} +void RecordingMemoryManager::deallocateExceptionTable(void *ET) { + llvm_unreachable("Unexpected!"); +} +void *RecordingMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + return NULL; +} diff --git a/tools/lli/RecordingMemoryManager.h b/tools/lli/RecordingMemoryManager.h new file mode 100644 index 0000000000..1590235a79 --- /dev/null +++ b/tools/lli/RecordingMemoryManager.h @@ -0,0 +1,78 @@ +//===- RecordingMemoryManager.h - LLI MCJIT recording memory manager ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This memory manager allocates local storage and keeps a record of each +// allocation. Iterators are provided for all data and code allocations. +// +//===----------------------------------------------------------------------===// + +#ifndef RECORDINGMEMORYMANAGER_H +#define RECORDINGMEMORYMANAGER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include <utility> + +namespace llvm { + +class RecordingMemoryManager : public JITMemoryManager { +public: + typedef std::pair<sys::MemoryBlock, unsigned> Allocation; + +private: + SmallVector<Allocation, 16> AllocatedDataMem; + SmallVector<Allocation, 16> AllocatedCodeMem; + +public: + RecordingMemoryManager() {} + virtual ~RecordingMemoryManager() {} + + typedef SmallVectorImpl<Allocation>::const_iterator const_data_iterator; + typedef SmallVectorImpl<Allocation>::const_iterator const_code_iterator; + + const_data_iterator data_begin() const { return AllocatedDataMem.begin(); } + const_data_iterator data_end() const { return AllocatedDataMem.end(); } + const_code_iterator code_begin() const { return AllocatedCodeMem.begin(); } + const_code_iterator code_end() const { return AllocatedCodeMem.end(); } + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID); + + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID); + + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + // The following obsolete JITMemoryManager calls are stubbed out for + // this model. + void setMemoryWritable(); + void setMemoryExecutable(); + void setPoisonMemory(bool poison); + void AllocateGOT(); + uint8_t *getGOTBase() const; + uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize); + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment); + void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd); + uint8_t *allocateSpace(intptr_t Size, unsigned Alignment); + uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment); + void deallocateFunctionBody(void *Body); + uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize); + void endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister); + void deallocateExceptionTable(void *ET); + +}; + +} // end namespace llvm + +#endif diff --git a/tools/lli/RemoteTarget.cpp b/tools/lli/RemoteTarget.cpp new file mode 100644 index 0000000000..918f1572e3 --- /dev/null +++ b/tools/lli/RemoteTarget.cpp @@ -0,0 +1,61 @@ +//===- RemoteTarget.cpp - LLVM Remote process JIT execution --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the RemoteTarget class which executes JITed code in a +// separate address range from where it was built. +// +//===----------------------------------------------------------------------===// + +#include "RemoteTarget.h" +#include <llvm/ADT/StringRef.h> +#include <llvm/Support/DataTypes.h> +#include <llvm/Support/Memory.h> +#include <stdlib.h> +#include <string> +using namespace llvm; + +bool RemoteTarget::allocateSpace(size_t Size, unsigned Alignment, + uint64_t &Address) { + sys::MemoryBlock *Prev = Allocations.size() ? &Allocations.back() : NULL; + sys::MemoryBlock Mem = sys::Memory::AllocateRWX(Size, Prev, &ErrorMsg); + if (Mem.base() == NULL) + return true; + if ((uintptr_t)Mem.base() % Alignment) { + ErrorMsg = "unable to allocate sufficiently aligned memory"; + return true; + } + Address = reinterpret_cast<uint64_t>(Mem.base()); + return false; +} + +bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) { + memcpy ((void*)Address, Data, Size); + sys::MemoryBlock Mem((void*)Address, Size); + sys::Memory::setExecutable(Mem, &ErrorMsg); + return false; +} + +bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) { + memcpy ((void*)Address, Data, Size); + return false; +} + +bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) { + int (*fn)(void) = (int(*)(void))Address; + RetVal = fn(); + return false; +} + +void RemoteTarget::create() { +} + +void RemoteTarget::stop() { + for (unsigned i = 0, e = Allocations.size(); i != e; ++i) + sys::Memory::ReleaseRWX(Allocations[i]); +} diff --git a/tools/lli/RemoteTarget.h b/tools/lli/RemoteTarget.h new file mode 100644 index 0000000000..c5845266d6 --- /dev/null +++ b/tools/lli/RemoteTarget.h @@ -0,0 +1,101 @@ +//===- RemoteTarget.h - LLVM Remote process JIT execution ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of the RemoteTarget class which executes JITed code in a +// separate address range from where it was built. +// +//===----------------------------------------------------------------------===// + +#ifndef REMOTEPROCESS_H +#define REMOTEPROCESS_H + +#include <llvm/ADT/StringRef.h> +#include <llvm/ADT/SmallVector.h> +#include <llvm/Support/DataTypes.h> +#include <llvm/Support/Memory.h> +#include <stdlib.h> +#include <string> + +namespace llvm { + +class RemoteTarget { + std::string ErrorMsg; + bool IsRunning; + + SmallVector<sys::MemoryBlock, 16> Allocations; + +public: + StringRef getErrorMsg() const { return ErrorMsg; } + + /// Allocate space in the remote target address space. + /// + /// @param Size Amount of space, in bytes, to allocate. + /// @param Alignment Required minimum alignment for allocated space. + /// @param[out] Address Remote address of the allocated memory. + /// + /// @returns False on success. On failure, ErrorMsg is updated with + /// descriptive text of the encountered error. + bool allocateSpace(size_t Size, unsigned Alignment, uint64_t &Address); + + /// Load data into the target address space. + /// + /// @param Address Destination address in the target process. + /// @param Data Source address in the host process. + /// @param Size Number of bytes to copy. + /// + /// @returns False on success. On failure, ErrorMsg is updated with + /// descriptive text of the encountered error. + bool loadData(uint64_t Address, const void *Data, size_t Size); + + /// Load code into the target address space and prepare it for execution. + /// + /// @param Address Destination address in the target process. + /// @param Data Source address in the host process. + /// @param Size Number of bytes to copy. + /// + /// @returns False on success. On failure, ErrorMsg is updated with + /// descriptive text of the encountered error. + bool loadCode(uint64_t Address, const void *Data, size_t Size); + + /// Execute code in the target process. The called function is required + /// to be of signature int "(*)(void)". + /// + /// @param Address Address of the loaded function in the target + /// process. + /// @param[out] RetVal The integer return value of the called function. + /// + /// @returns False on success. On failure, ErrorMsg is updated with + /// descriptive text of the encountered error. + bool executeCode(uint64_t Address, int &RetVal); + + /// Minimum alignment for memory permissions. Used to seperate code and + /// data regions to make sure data doesn't get marked as code or vice + /// versa. + /// + /// @returns Page alignment return value. Default of 4k. + unsigned getPageAlignment() { return 4096; } + + /// Start the remote process. + void create(); + + /// Terminate the remote process. + void stop(); + + RemoteTarget() : ErrorMsg(""), IsRunning(false) {} + ~RemoteTarget() { if (IsRunning) stop(); } + +private: + // Main processing function for the remote target process. Command messages + // are received on file descriptor CmdFD and responses come back on OutFD. + static void doRemoteTargeting(int CmdFD, int OutFD); +}; + +} // end namespace llvm + +#endif diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index b6c9299c65..4004b6c4d4 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -13,6 +13,9 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "lli" +#include "RecordingMemoryManager.h" +#include "RemoteTarget.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Type.h" @@ -32,9 +35,11 @@ #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Memory.h" #include <cerrno> @@ -73,6 +78,13 @@ namespace { "use-mcjit", cl::desc("Enable use of the MC-based JIT (if available)"), cl::init(false)); + // The MCJIT supports building for a target address space separate from + // the JIT compilation process. Use a forked process and a copying + // memory manager with IPC to execute using this functionality. + cl::opt<bool> RemoteMCJIT("remote-mcjit", + cl::desc("Execute MCJIT'ed code in a separate process."), + cl::init(false)); + // Determine optimization level. cl::opt<char> OptLevel("O", @@ -372,6 +384,79 @@ LLIMCJITMemoryManager::~LLIMCJITMemoryManager() { free(AllocatedDataMem[i].base()); } + +void layoutRemoteTargetMemory(RemoteTarget *T, RecordingMemoryManager *JMM) { + // Lay out our sections in order, with all the code sections first, then + // all the data sections. + uint64_t CurOffset = 0; + unsigned MaxAlign = T->getPageAlignment(); + SmallVector<std::pair<const void*, uint64_t>, 16> Offsets; + SmallVector<unsigned, 16> Sizes; + for (RecordingMemoryManager::const_code_iterator I = JMM->code_begin(), + E = JMM->code_end(); + I != E; ++I) { + DEBUG(dbgs() << "code region: size " << I->first.size() + << ", alignment " << I->second << "\n"); + // Align the current offset up to whatever is needed for the next + // section. + unsigned Align = I->second; + CurOffset = (CurOffset + Align - 1) / Align * Align; + // Save off the address of the new section and allocate its space. + Offsets.push_back(std::pair<const void*,uint64_t>(I->first.base(), CurOffset)); + Sizes.push_back(I->first.size()); + CurOffset += I->first.size(); + } + // Adjust to keep code and data aligned on seperate pages. + CurOffset = (CurOffset + MaxAlign - 1) / MaxAlign * MaxAlign; + unsigned FirstDataIndex = Offsets.size(); + for (RecordingMemoryManager::const_data_iterator I = JMM->data_begin(), + E = JMM->data_end(); + I != E; ++I) { + DEBUG(dbgs() << "data region: size " << I->first.size() + << ", alignment " << I->second << "\n"); + // Align the current offset up to whatever is needed for the next + // section. + unsigned Align = I->second; + CurOffset = (CurOffset + Align - 1) / Align * Align; + // Save off the address of the new section and allocate its space. + Offsets.push_back(std::pair<const void*,uint64_t>(I->first.base(), CurOffset)); + Sizes.push_back(I->first.size()); + CurOffset += I->first.size(); + } + + // Allocate space in the remote target. + uint64_t RemoteAddr; + if (T->allocateSpace(CurOffset, MaxAlign, RemoteAddr)) + report_fatal_error(T->getErrorMsg()); + // Map the section addresses so relocations will get updated in the local + // copies of the sections. + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + uint64_t Addr = RemoteAddr + Offsets[i].second; + EE->mapSectionAddress(const_cast<void*>(Offsets[i].first), Addr); + + DEBUG(dbgs() << " Mapping local: " << Offsets[i].first + << " to remote: " << format("%#018x", Addr) << "\n"); + + } + // Now load it all to the target. + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + uint64_t Addr = RemoteAddr + Offsets[i].second; + + if (i < FirstDataIndex) { + T->loadCode(Addr, Offsets[i].first, Sizes[i]); + + DEBUG(dbgs() << " loading code: " << Offsets[i].first + << " to remote: " << format("%#018x", Addr) << "\n"); + } else { + T->loadData(Addr, Offsets[i].first, Sizes[i]); + + DEBUG(dbgs() << " loading data: " << Offsets[i].first + << " to remote: " << format("%#018x", Addr) << "\n"); + } + + } +} + //===----------------------------------------------------------------------===// // main Driver function // @@ -428,12 +513,19 @@ int main(int argc, char **argv, char * const *envp) { Mod->setTargetTriple(Triple::normalize(TargetTriple)); // Enable MCJIT if desired. - LLIMCJITMemoryManager *JMM = 0; + JITMemoryManager *JMM = 0; if (UseMCJIT && !ForceInterpreter) { builder.setUseMCJIT(true); - JMM = new LLIMCJITMemoryManager(); + if (RemoteMCJIT) + JMM = new RecordingMemoryManager(); + else + JMM = new LLIMCJITMemoryManager(); builder.setJITMemoryManager(JMM); } else { + if (RemoteMCJIT) { + errs() << "error: Remote process execution requires -use-mcjit\n"; + exit(1); + } builder.setJITMemoryManager(ForceInterpreter ? 0 : JITMemoryManager::CreateDefaultMemManager()); } @@ -451,11 +543,14 @@ int main(int argc, char **argv, char * const *envp) { } builder.setOptLevel(OLvl); - TargetOptions Options; - Options.JITExceptionHandling = EnableJITExceptionHandling; - Options.JITEmitDebugInfo = EmitJitDebugInfo; - Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk; - builder.setTargetOptions(Options); + // Remote target execution doesn't handle EH or debug registration. + if (!RemoteMCJIT) { + TargetOptions Options; + Options.JITExceptionHandling = EnableJITExceptionHandling; + Options.JITEmitDebugInfo = EmitJitDebugInfo; + Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk; + builder.setTargetOptions(Options); + } EE = builder.create(); if (!EE) { @@ -466,10 +561,6 @@ int main(int argc, char **argv, char * const *envp) { exit(1); } - // Clear instruction cache before code will be executed. - if (JMM) - JMM->invalidateInstructionCache(); - // The following functions have no effect if their respective profiling // support wasn't enabled in the build configuration. EE->RegisterJITEventListener( @@ -477,6 +568,10 @@ int main(int argc, char **argv, char * const *envp) { EE->RegisterJITEventListener( JITEventListener::createIntelJITEventListener()); + if (!NoLazyCompilation && RemoteMCJIT) { + errs() << "warning: remote mcjit does not support lazy compilation\n"; + NoLazyCompilation = true; + } EE->DisableLazyCompilation(NoLazyCompilation); // If the user specifically requested an argv[0] to pass into the program, @@ -513,8 +608,13 @@ int main(int argc, char **argv, char * const *envp) { // Reset errno to zero on entry to main. errno = 0; + // Remote target MCJIT doesn't (yet) support static constructors. No reason + // it couldn't. This is a limitation of the LLI implemantation, not the + // MCJIT itself. FIXME. + // // Run static constructors. - EE->runStaticConstructorsDestructors(false); + if (!RemoteMCJIT) + EE->runStaticConstructorsDestructors(false); if (NoLazyCompilation) { for (Module::iterator I = Mod->begin(), E = Mod->end(); I != E; ++I) { @@ -524,24 +624,66 @@ int main(int argc, char **argv, char * const *envp) { } } - // Run main. - int Result = EE->runFunctionAsMain(EntryFn, InputArgv, envp); + int Result; + if (RemoteMCJIT) { + RecordingMemoryManager *MM = static_cast<RecordingMemoryManager*>(JMM); + // Everything is prepared now, so lay out our program for the target + // address space, assign the section addresses to resolve any relocations, + // and send it to the target. + RemoteTarget Target; + Target.create(); + + // Ask for a pointer to the entry function. This triggers the actual + // compilation. + (void)EE->getPointerToFunction(EntryFn); + + // Enough has been compiled to execute the entry function now, so + // layout the target memory. + layoutRemoteTargetMemory(&Target, MM); + + // Since we're executing in a (at least simulated) remote address space, + // we can't use the ExecutionEngine::runFunctionAsMain(). We have to + // grab the function address directly here and tell the remote target + // to execute the function. + // FIXME: argv and envp handling. + uint64_t Entry = (uint64_t)EE->getPointerToFunction(EntryFn); + + DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at " + << format("%#18x", Entry) << "\n"); + + if (Target.executeCode(Entry, Result)) + errs() << "ERROR: " << Target.getErrorMsg() << "\n"; + + Target.stop(); + } else { + // Clear instruction cache before code will be executed. + if (JMM) + static_cast<LLIMCJITMemoryManager*>(JMM)->invalidateInstructionCache(); - // Run static destructors. - EE->runStaticConstructorsDestructors(true); + // Run main. + Result = EE->runFunctionAsMain(EntryFn, InputArgv, envp); + } - // If the program didn't call exit explicitly, we should call it now. - // This ensures that any atexit handlers get called correctly. - if (Function *ExitF = dyn_cast<Function>(Exit)) { - std::vector<GenericValue> Args; - GenericValue ResultGV; - ResultGV.IntVal = APInt(32, Result); - Args.push_back(ResultGV); - EE->runFunction(ExitF, Args); - errs() << "ERROR: exit(" << Result << ") returned!\n"; - abort(); - } else { - errs() << "ERROR: exit defined with wrong prototype!\n"; - abort(); + // Like static constructors, the remote target MCJIT support doesn't handle + // this yet. It could. FIXME. + if (!RemoteMCJIT) { + // Run static destructors. + EE->runStaticConstructorsDestructors(true); + + // If the program didn't call exit explicitly, we should call it now. + // This ensures that any atexit handlers get called correctly. + if (Function *ExitF = dyn_cast<Function>(Exit)) { + std::vector<GenericValue> Args; + GenericValue ResultGV; + ResultGV.IntVal = APInt(32, Result); + Args.push_back(ResultGV); + EE->runFunction(ExitF, Args); + errs() << "ERROR: exit(" << Result << ") returned!\n"; + abort(); + } else { + errs() << "ERROR: exit defined with wrong prototype!\n"; + abort(); + } } + return Result; } diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index d6300878d5..8109ca4d5b 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -40,7 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/system_error.h" -#include <cstdio> + #include <map> #include <algorithm> using namespace llvm; @@ -463,11 +463,11 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) { } static void PrintSize(double Bits) { - fprintf(stderr, "%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32)); + outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32)); } static void PrintSize(uint64_t Bits) { - fprintf(stderr, "%lub/%.2fB/%luW", (unsigned long)Bits, - (double)Bits/8, (unsigned long)(Bits/32)); + outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits, + (double)Bits/8, (unsigned long)(Bits/32)); } @@ -483,7 +483,7 @@ static int AnalyzeBitcode() { if (MemBuf->getBufferSize() & 3) return Error("Bitcode stream should be a multiple of 4 bytes in length"); - const unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart(); + const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize(); // If we have a wrapper header, parse it and ignore the non-bc file contents. @@ -556,7 +556,7 @@ static int AnalyzeBitcode() { PrintSize(Stats.NumBits); outs() << "\n"; double pct = (Stats.NumBits * 100.0) / BufferSizeBits; - errs() << " Percent of file: " << format("%2.4f%%", pct) << "\n"; + outs() << " Percent of file: " << format("%2.4f%%", pct) << "\n"; if (Stats.NumInstances > 1) { outs() << " Average Size: "; PrintSize(Stats.NumBits/(double)Stats.NumInstances); @@ -588,24 +588,26 @@ static int AnalyzeBitcode() { std::reverse(FreqPairs.begin(), FreqPairs.end()); outs() << "\tRecord Histogram:\n"; - fprintf(stderr, "\t\t Count # Bits %% Abv Record Kind\n"); + outs() << "\t\t Count # Bits %% Abv Record Kind\n"; for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second]; - fprintf(stderr, "\t\t%7d %9lu ", RecStats.NumInstances, - (unsigned long)RecStats.TotalBits); + outs() << format("\t\t%7d %9lu", + RecStats.NumInstances, + (unsigned long)RecStats.TotalBits); if (RecStats.NumAbbrev) - fprintf(stderr, "%7.2f ", - (double)RecStats.NumAbbrev/RecStats.NumInstances*100); + outs() << + format("%7.2f ", + (double)RecStats.NumAbbrev/RecStats.NumInstances*100); else - fprintf(stderr, " "); + outs() << " "; if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first, StreamFile)) - fprintf(stderr, "%s\n", CodeName); + outs() << CodeName << "\n"; else - fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second); + outs() << "UnknownCode" << FreqPairs[i].second << "\n"; } outs() << "\n"; diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp index ec0b4aeb63..38c3a1e76f 100644 --- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp +++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -44,6 +44,18 @@ PrintFunctions("functions", cl::init(false), cl::desc("Print function names as well as line information " "for a given address")); +static cl::opt<bool> +PrintInlining("inlining", cl::init(false), + cl::desc("Print all inlined frames for a given address")); + +static void PrintDILineInfo(DILineInfo dli) { + if (PrintFunctions) + outs() << (dli.getFunctionName() ? dli.getFunctionName() : "<unknown>") + << "\n"; + outs() << (dli.getFileName() ? dli.getFileName() : "<unknown>") << ':' + << dli.getLine() << ':' << dli.getColumn() << '\n'; +} + static void DumpInput(const StringRef &Filename) { OwningPtr<MemoryBuffer> Buff; @@ -59,6 +71,7 @@ static void DumpInput(const StringRef &Filename) { StringRef DebugLineSection; StringRef DebugArangesSection; StringRef DebugStringSection; + StringRef DebugRangesSection; error_code ec; for (section_iterator i = Obj->begin_sections(), @@ -82,6 +95,8 @@ static void DumpInput(const StringRef &Filename) { DebugArangesSection = data; else if (name == "debug_str") DebugStringSection = data; + else if (name == "debug_ranges") + DebugRangesSection = data; } OwningPtr<DIContext> dictx(DIContext::getDWARFContext(/*FIXME*/true, @@ -89,7 +104,8 @@ static void DumpInput(const StringRef &Filename) { DebugAbbrevSection, DebugArangesSection, DebugLineSection, - DebugStringSection)); + DebugStringSection, + DebugRangesSection)); if (Address == -1ULL) { outs() << Filename << ":\tfile format " << Obj->getFileFormatName() << "\n\n"; @@ -97,16 +113,27 @@ static void DumpInput(const StringRef &Filename) { dictx->dump(outs()); } else { // Print line info for the specified address. - int spec_flags = DILineInfoSpecifier::FileLineInfo | - DILineInfoSpecifier::AbsoluteFilePath; - if (PrintFunctions) - spec_flags |= DILineInfoSpecifier::FunctionName; - DILineInfo dli = dictx->getLineInfoForAddress(Address, spec_flags); + int SpecFlags = DILineInfoSpecifier::FileLineInfo | + DILineInfoSpecifier::AbsoluteFilePath; if (PrintFunctions) - outs() << (dli.getFunctionName() ? dli.getFunctionName() : "<unknown>") - << "\n"; - outs() << (dli.getFileName() ? dli.getFileName() : "<unknown>") << ':' - << dli.getLine() << ':' << dli.getColumn() << '\n'; + SpecFlags |= DILineInfoSpecifier::FunctionName; + if (PrintInlining) { + DIInliningInfo InliningInfo = dictx->getInliningInfoForAddress( + Address, SpecFlags); + uint32_t n = InliningInfo.getNumberOfFrames(); + if (n == 0) { + // Print one empty debug line info in any case. + PrintDILineInfo(DILineInfo()); + } else { + for (uint32_t i = 0; i < n; i++) { + DILineInfo dli = InliningInfo.getFrame(i); + PrintDILineInfo(dli); + } + } + } else { + DILineInfo dli = dictx->getLineInfoForAddress(Address, SpecFlags); + PrintDILineInfo(dli); + } } } diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index b431c7638d..13ea4e3295 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -94,6 +94,12 @@ static cl::alias SectionHeadersShorter("h", cl::desc("Alias for --section-headers"), cl::aliasopt(SectionHeaders)); +static cl::list<std::string> +MAttrs("mattr", + cl::CommaSeparated, + cl::desc("Target specific attributes"), + cl::value_desc("a1,+a2,-a3,...")); + static StringRef ToolName; static bool error(error_code ec) { @@ -169,6 +175,15 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (!TheTarget) return; + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + if (MAttrs.size()) { + SubtargetFeatures Features; + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + FeaturesStr = Features.getString(); + } + error_code ec; for (section_iterator i = Obj->begin_sections(), e = Obj->end_sections(); @@ -233,7 +248,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } OwningPtr<const MCSubtargetInfo> STI( - TheTarget->createMCSubtargetInfo(TripleName, "", "")); + TheTarget->createMCSubtargetInfo(TripleName, "", FeaturesStr)); if (!STI) { errs() << "error: no subtarget info for target " << TripleName << "\n"; diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index e9e39da50b..6c4bf49485 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -311,13 +311,16 @@ bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg) { // generate object file bool genResult = false; tool_output_file objFile(uniqueObjPath.c_str(), errMsg); - if (!errMsg.empty()) + if (!errMsg.empty()) { + uniqueObjPath.eraseFromDisk(); return true; + } genResult = this->generateObjectFile(objFile.os(), errMsg); objFile.os().close(); if (objFile.os().has_error()) { objFile.os().clear_error(); + uniqueObjPath.eraseFromDisk(); return true; } @@ -344,6 +347,7 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg) { OwningPtr<MemoryBuffer> BuffPtr; if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) { errMsg = ec.message(); + sys::Path(_nativeObjectPath).eraseFromDisk(); return NULL; } _nativeObjectFile = BuffPtr.take(); diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp index 5c7d9c4106..c3f2ca14dc 100644 --- a/tools/lto/LTOModule.cpp +++ b/tools/lto/LTOModule.cpp @@ -166,7 +166,7 @@ LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t) /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. bool LTOModule::isBitcodeFile(const void *mem, size_t length) { - return llvm::sys::IdentifyFileType((char*)mem, length) + return llvm::sys::IdentifyFileType((const char*)mem, length) == llvm::sys::Bitcode_FileType; } @@ -317,7 +317,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, /// makeBuffer - Create a MemoryBuffer from a memory range. MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) { - const char *startPtr = (char*)mem; + const char *startPtr = (const char*)mem; return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false); } diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 4ada7d1e76..7ecd25c6b7 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -513,10 +513,6 @@ int main(int argc, char **argv) { return 1; } - // Allocate a full target machine description only if necessary. - // FIXME: The choice of target should be controllable on the command line. - std::auto_ptr<TargetMachine> target; - SMDiagnostic Err; // Load the input module... diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt index d272b09088..cb9a9092b5 100644 --- a/unittests/ADT/CMakeLists.txt +++ b/unittests/ADT/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_LINK_COMPONENTS Support ) -add_llvm_unittest(ADTTests +set(ADTSources APFloatTest.cpp APIntTest.cpp BitVectorTest.cpp @@ -31,3 +31,16 @@ add_llvm_unittest(ADTTests TwineTest.cpp VariadicFunctionTest.cpp ) + +# They cannot be compiled on MSVC9 due to its bug. +if(MSVC AND MSVC_VERSION LESS 1600) + set(LLVM_OPTIONAL_SOURCES + DenseMapTest.cpp + SmallVectorTest.cpp + ) + list(REMOVE_ITEM ADTSources ${LLVM_OPTIONAL_SOURCES}) +endif() + +add_llvm_unittest(ADTTests + ${ADTSources} + ) diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 33f04ce647..0805504ad1 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -45,6 +45,10 @@ static cl::opt<bool> NoCanonicalizeWhiteSpace("strict-whitespace", cl::desc("Do not treat all horizontal whitespace as equivalent")); +static cl::opt<bool> +NoRegex("exact-match", + cl::desc("Look for exact matches without using regular expressions")); + //===----------------------------------------------------------------------===// // Pattern Handling Code. //===----------------------------------------------------------------------===// @@ -124,7 +128,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { } // Check to see if this is a fixed string, or if it has regex pieces. - if (PatternStr.size() < 2 || + if (PatternStr.size() < 2 || NoRegex || (PatternStr.find("{{") == StringRef::npos && PatternStr.find("[[") == StringRef::npos)) { FixedStr = PatternStr; diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index abcec8fe94..78eb641899 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -416,7 +416,7 @@ struct MatchableInfo { SmallVector<SubtargetFeatureInfo*, 4> RequiredFeatures; /// ConversionFnKind - The enum value which is passed to the generated - /// ConvertToMCInst to convert parsed operands into an MCInst for this + /// convertToMCInst to convert parsed operands into an MCInst for this /// function. std::string ConversionFnKind; @@ -488,6 +488,15 @@ struct MatchableInfo { return false; } + // Give matches that require more features higher precedence. This is useful + // because we cannot define AssemblerPredicates with the negation of + // processor features. For example, ARM v6 "nop" may be either a HINT or + // MOV. With v6, we want to match HINT. The assembler has no way to + // predicate MOV under "NoV6", but HINT will always match first because it + // requires V6 while MOV does not. + if (RequiredFeatures.size() != RHS.RequiredFeatures.size()) + return RequiredFeatures.size() > RHS.RequiredFeatures.size(); + return false; } @@ -666,7 +675,7 @@ void MatchableInfo::dump() { } static std::pair<StringRef, StringRef> -parseTwoOperandConstraint(StringRef S, SMLoc Loc) { +parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) { // Split via the '='. std::pair<StringRef, StringRef> Ops = S.split('='); if (Ops.second == "") @@ -1638,34 +1647,90 @@ void MatchableInfo::buildAliasResultOperands() { } } +static unsigned getConverterOperandID(const std::string &Name, + SetVector<std::string> &Table, + bool &IsNew) { + IsNew = Table.insert(Name); + + unsigned ID = IsNew ? Table.size() - 1 : + std::find(Table.begin(), Table.end(), Name) - Table.begin(); + + assert(ID < Table.size()); + + return ID; +} + + static void emitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName, std::vector<MatchableInfo*> &Infos, raw_ostream &OS) { + SetVector<std::string> OperandConversionKinds; + SetVector<std::string> InstructionConversionKinds; + std::vector<std::vector<uint8_t> > ConversionTable; + size_t MaxRowLength = 2; // minimum is custom converter plus terminator. + + // TargetOperandClass - This is the target's operand class, like X86Operand. + std::string TargetOperandClass = Target.getName() + "Operand"; + // Write the convert function to a separate stream, so we can drop it after - // the enum. + // the enum. We'll build up the conversion handlers for the individual + // operand types opportunistically as we encounter them. std::string ConvertFnBody; raw_string_ostream CvtOS(ConvertFnBody); - - // Function we have already generated. - std::set<std::string> GeneratedFns; - // Start the unified conversion function. - CvtOS << "bool " << Target.getName() << ClassName << "::\n"; - CvtOS << "ConvertToMCInst(unsigned Kind, MCInst &Inst, " + CvtOS << "void " << Target.getName() << ClassName << "::\n" + << "convertToMCInst(unsigned Kind, MCInst &Inst, " << "unsigned Opcode,\n" - << " const SmallVectorImpl<MCParsedAsmOperand*" - << "> &Operands) {\n"; - CvtOS << " Inst.setOpcode(Opcode);\n"; - CvtOS << " switch (Kind) {\n"; - CvtOS << " default:\n"; - - // Start the enum, which we will generate inline. - - OS << "// Unified function for converting operands to MCInst instances.\n\n"; - OS << "enum ConversionKind {\n"; - - // TargetOperandClass - This is the target's operand class, like X86Operand. - std::string TargetOperandClass = Target.getName() + "Operand"; + << " const SmallVectorImpl<MCParsedAsmOperand*" + << "> &Operands) {\n" + << " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n" + << " uint8_t *Converter = ConversionTable[Kind];\n" + << " Inst.setOpcode(Opcode);\n" + << " for (uint8_t *p = Converter; *p; p+= 2) {\n" + << " switch (*p) {\n" + << " default: llvm_unreachable(\"invalid conversion entry!\");\n" + << " case CVT_Reg:\n" + << " static_cast<" << TargetOperandClass + << "*>(Operands[*(p + 1)])->addRegOperands(Inst, 1);\n" + << " break;\n" + << " case CVT_Tied:\n" + << " Inst.addOperand(Inst.getOperand(*(p + 1)));\n" + << " break;\n"; + + std::string OperandFnBody; + raw_string_ostream OpOS(OperandFnBody); + // Start the operand number lookup function. + OpOS << "unsigned " << Target.getName() << ClassName << "::\n" + << "getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n" + << " const SmallVectorImpl<MCParsedAsmOperand*> " + << "&Operands,\n unsigned OperandNum, unsigned " + << "&NumMCOperands) {\n" + << " assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n" + << " NumMCOperands = 0;\n" + << " unsigned MCOperandNum = 0;\n" + << " uint8_t *Converter = ConversionTable[Kind];\n" + << " for (uint8_t *p = Converter; *p; p+= 2) {\n" + << " if (*(p + 1) > OperandNum) continue;\n" + << " switch (*p) {\n" + << " default: llvm_unreachable(\"invalid conversion entry!\");\n" + << " case CVT_Reg:\n" + << " if (*(p + 1) == OperandNum) {\n" + << " NumMCOperands = 1;\n" + << " break;\n" + << " }\n" + << " ++MCOperandNum;\n" + << " break;\n" + << " case CVT_Tied:\n" + << " // FIXME: Tied operand calculation not supported.\n" + << " assert (0 && \"getMCInstOperandNumImpl() doesn't support tied operands, yet!\");\n" + << " break;\n"; + + // Pre-populate the operand conversion kinds with the standard always + // available entries. + OperandConversionKinds.insert("CVT_Done"); + OperandConversionKinds.insert("CVT_Reg"); + OperandConversionKinds.insert("CVT_Tied"); + enum { CVT_Done, CVT_Reg, CVT_Tied }; for (std::vector<MatchableInfo*>::const_iterator it = Infos.begin(), ie = Infos.end(); it != ie; ++it) { @@ -1679,24 +1744,35 @@ static void emitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName, II.ConversionFnKind = Signature; // Check if we have already generated this signature. - if (!GeneratedFns.insert(Signature).second) + if (!InstructionConversionKinds.insert(Signature)) continue; - // If not, emit it now. Add to the enum list. - OS << " " << Signature << ",\n"; + // Remember this converter for the kind enum. + unsigned KindID = OperandConversionKinds.size(); + OperandConversionKinds.insert("CVT_" + AsmMatchConverter); - CvtOS << " case " << Signature << ":\n"; - CvtOS << " return " << AsmMatchConverter - << "(Inst, Opcode, Operands);\n"; + // Add the converter row for this instruction. + ConversionTable.push_back(std::vector<uint8_t>()); + ConversionTable.back().push_back(KindID); + ConversionTable.back().push_back(CVT_Done); + + // Add the handler to the conversion driver function. + CvtOS << " case CVT_" << AsmMatchConverter << ":\n" + << " " << AsmMatchConverter << "(Inst, Operands);\n" + << " break;\n"; + + // FIXME: Handle the operand number lookup for custom match functions. continue; } // Build the conversion function signature. std::string Signature = "Convert"; - std::string CaseBody; - raw_string_ostream CaseOS(CaseBody); + + std::vector<uint8_t> ConversionRow; // Compute the convert enum and the case body. + MaxRowLength = std::max(MaxRowLength, II.ResOperands.size()*2 + 1 ); + for (unsigned i = 0, e = II.ResOperands.size(); i != e; ++i) { const MatchableInfo::ResOperand &OpInfo = II.ResOperands[i]; @@ -1709,74 +1785,186 @@ static void emitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName, // Registers are always converted the same, don't duplicate the // conversion function based on them. Signature += "__"; - if (Op.Class->isRegisterClass()) - Signature += "Reg"; - else - Signature += Op.Class->ClassName; + std::string Class; + Class = Op.Class->isRegisterClass() ? "Reg" : Op.Class->ClassName; + Signature += Class; Signature += utostr(OpInfo.MINumOperands); Signature += "_" + itostr(OpInfo.AsmOperandNum); - CaseOS << " ((" << TargetOperandClass << "*)Operands[" - << (OpInfo.AsmOperandNum+1) << "])->" << Op.Class->RenderMethod - << "(Inst, " << OpInfo.MINumOperands << ");\n"; + // Add the conversion kind, if necessary, and get the associated ID + // the index of its entry in the vector). + std::string Name = "CVT_" + (Op.Class->isRegisterClass() ? "Reg" : + Op.Class->RenderMethod); + + bool IsNewConverter = false; + unsigned ID = getConverterOperandID(Name, OperandConversionKinds, + IsNewConverter); + + // Add the operand entry to the instruction kind conversion row. + ConversionRow.push_back(ID); + ConversionRow.push_back(OpInfo.AsmOperandNum + 1); + + if (!IsNewConverter) + break; + + // This is a new operand kind. Add a handler for it to the + // converter driver. + CvtOS << " case " << Name << ":\n" + << " static_cast<" << TargetOperandClass + << "*>(Operands[*(p + 1)])->" + << Op.Class->RenderMethod << "(Inst, " << OpInfo.MINumOperands + << ");\n" + << " break;\n"; + + // Add a handler for the operand number lookup. + OpOS << " case " << Name << ":\n" + << " if (*(p + 1) == OperandNum) {\n" + << " NumMCOperands = " << OpInfo.MINumOperands << ";\n" + << " break;\n" + << " }\n" + << " MCOperandNum += " << OpInfo.MINumOperands << ";\n" + << " break;\n"; break; } - case MatchableInfo::ResOperand::TiedOperand: { // If this operand is tied to a previous one, just copy the MCInst // operand from the earlier one.We can only tie single MCOperand values. //assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand"); unsigned TiedOp = OpInfo.TiedOperandNum; assert(i > TiedOp && "Tied operand precedes its target!"); - CaseOS << " Inst.addOperand(Inst.getOperand(" << TiedOp << "));\n"; Signature += "__Tie" + utostr(TiedOp); + ConversionRow.push_back(CVT_Tied); + ConversionRow.push_back(TiedOp); + // FIXME: Handle the operand number lookup for tied operands. break; } case MatchableInfo::ResOperand::ImmOperand: { int64_t Val = OpInfo.ImmVal; - CaseOS << " Inst.addOperand(MCOperand::CreateImm(" << Val << "));\n"; - Signature += "__imm" + itostr(Val); + std::string Ty = "imm_" + itostr(Val); + Signature += "__" + Ty; + + std::string Name = "CVT_" + Ty; + bool IsNewConverter = false; + unsigned ID = getConverterOperandID(Name, OperandConversionKinds, + IsNewConverter); + // Add the operand entry to the instruction kind conversion row. + ConversionRow.push_back(ID); + ConversionRow.push_back(0); + + if (!IsNewConverter) + break; + + CvtOS << " case " << Name << ":\n" + << " Inst.addOperand(MCOperand::CreateImm(" << Val << "));\n" + << " break;\n"; + + OpOS << " case " << Name << ":\n" + << " if (*(p + 1) == OperandNum) {\n" + << " NumMCOperands = 1;\n" + << " break;\n" + << " }\n" + << " ++MCOperandNum;\n" + << " break;\n"; break; } case MatchableInfo::ResOperand::RegOperand: { + std::string Reg, Name; if (OpInfo.Register == 0) { - CaseOS << " Inst.addOperand(MCOperand::CreateReg(0));\n"; - Signature += "__reg0"; + Name = "reg0"; + Reg = "0"; } else { - std::string N = getQualifiedName(OpInfo.Register); - CaseOS << " Inst.addOperand(MCOperand::CreateReg(" << N << "));\n"; - Signature += "__reg" + OpInfo.Register->getName(); + Reg = getQualifiedName(OpInfo.Register); + Name = "reg" + OpInfo.Register->getName(); } + Signature += "__" + Name; + Name = "CVT_" + Name; + bool IsNewConverter = false; + unsigned ID = getConverterOperandID(Name, OperandConversionKinds, + IsNewConverter); + // Add the operand entry to the instruction kind conversion row. + ConversionRow.push_back(ID); + ConversionRow.push_back(0); + + if (!IsNewConverter) + break; + CvtOS << " case " << Name << ":\n" + << " Inst.addOperand(MCOperand::CreateReg(" << Reg << "));\n" + << " break;\n"; + + OpOS << " case " << Name << ":\n" + << " if (*(p + 1) == OperandNum) {\n" + << " NumMCOperands = 1;\n" + << " break;\n" + << " }\n" + << " ++MCOperandNum;\n" + << " break;\n"; } } } + // If there were no operands, add to the signature to that effect + if (Signature == "Convert") + Signature += "_NoOperands"; + II.ConversionFnKind = Signature; - // Check if we have already generated this signature. - if (!GeneratedFns.insert(Signature).second) + // Save the signature. If we already have it, don't add a new row + // to the table. + if (!InstructionConversionKinds.insert(Signature)) continue; - // If not, emit it now. Add to the enum list. - OS << " " << Signature << ",\n"; - - CvtOS << " case " << Signature << ":\n"; - CvtOS << CaseOS.str(); - CvtOS << " return true;\n"; + // Add the row to the table. + ConversionTable.push_back(ConversionRow); } - // Finish the convert function. + // Finish up the converter driver function. + CvtOS << " }\n }\n}\n\n"; + + // Finish up the operand number lookup function. + OpOS << " }\n }\n return MCOperandNum;\n}\n\n"; + + OS << "namespace {\n"; + + // Output the operand conversion kind enum. + OS << "enum OperatorConversionKind {\n"; + for (unsigned i = 0, e = OperandConversionKinds.size(); i != e; ++i) + OS << " " << OperandConversionKinds[i] << ",\n"; + OS << " CVT_NUM_CONVERTERS\n"; + OS << "};\n\n"; + + // Output the instruction conversion kind enum. + OS << "enum InstructionConversionKind {\n"; + for (SetVector<std::string>::const_iterator + i = InstructionConversionKinds.begin(), + e = InstructionConversionKinds.end(); i != e; ++i) + OS << " " << *i << ",\n"; + OS << " CVT_NUM_SIGNATURES\n"; + OS << "};\n\n"; + - CvtOS << " }\n"; - CvtOS << " return false;\n"; - CvtOS << "}\n\n"; + OS << "} // end anonymous namespace\n\n"; - // Finish the enum, and drop the convert function after it. + // Output the conversion table. + OS << "static uint8_t ConversionTable[CVT_NUM_SIGNATURES][" + << MaxRowLength << "] = {\n"; + + for (unsigned Row = 0, ERow = ConversionTable.size(); Row != ERow; ++Row) { + assert(ConversionTable[Row].size() % 2 == 0 && "bad conversion row!"); + OS << " // " << InstructionConversionKinds[Row] << "\n"; + OS << " { "; + for (unsigned i = 0, e = ConversionTable[Row].size(); i != e; i += 2) + OS << OperandConversionKinds[ConversionTable[Row][i]] << ", " + << (unsigned)(ConversionTable[Row][i + 1]) << ", "; + OS << "CVT_Done },\n"; + } - OS << " NumConversionVariants\n"; OS << "};\n\n"; + // Spit out the conversion driver function. OS << CvtOS.str(); + + // Spit out the operand number lookup function. + OS << OpOS.str(); } /// emitMatchClassEnumeration - Emit the enumeration for match class kinds. @@ -2407,14 +2595,19 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " // This should be included into the middle of the declaration of\n"; OS << " // your subclasses implementation of MCTargetAsmParser.\n"; OS << " unsigned ComputeAvailableFeatures(uint64_t FeatureBits) const;\n"; - OS << " bool ConvertToMCInst(unsigned Kind, MCInst &Inst, " + OS << " void convertToMCInst(unsigned Kind, MCInst &Inst, " << "unsigned Opcode,\n" - << " const SmallVectorImpl<MCParsedAsmOperand*> " + << " const SmallVectorImpl<MCParsedAsmOperand*> " << "&Operands);\n"; + OS << " unsigned getMCInstOperandNumImpl(unsigned Kind, MCInst &Inst,\n " + << " const " + << "SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n " + << " unsigned OperandNum, unsigned &NumMCOperands);\n"; OS << " bool MnemonicIsValid(StringRef Mnemonic);\n"; - OS << " unsigned MatchInstructionImpl(\n"; - OS << " const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n"; - OS << " MCInst &Inst, unsigned &ErrorInfo, unsigned VariantID = 0);\n"; + OS << " unsigned MatchInstructionImpl(\n" + << " const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n" + << " unsigned &Kind, MCInst &Inst, " + << "unsigned &ErrorInfo,\n unsigned VariantID = 0);\n"; if (Info.OperandMatchInfo.size()) { OS << "\n enum OperandMatchResultTy {\n"; @@ -2594,8 +2787,14 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { << Target.getName() << ClassName << "::\n" << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>" << " &Operands,\n"; - OS << " MCInst &Inst, unsigned &ErrorInfo, "; - OS << "unsigned VariantID) {\n"; + OS << " unsigned &Kind, MCInst &Inst, unsigned "; + OS << "&ErrorInfo,\n unsigned VariantID) {\n"; + + OS << " // Eliminate obvious mismatches.\n"; + OS << " if (Operands.size() > " << (MaxNumOperands+1) << ") {\n"; + OS << " ErrorInfo = " << (MaxNumOperands+1) << ";\n"; + OS << " return Match_InvalidOperand;\n"; + OS << " }\n\n"; // Emit code to get the available features. OS << " // Get the current feature set.\n"; @@ -2613,12 +2812,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { } // Emit code to compute the class list for this operand vector. - OS << " // Eliminate obvious mismatches.\n"; - OS << " if (Operands.size() > " << (MaxNumOperands+1) << ") {\n"; - OS << " ErrorInfo = " << (MaxNumOperands+1) << ";\n"; - OS << " return Match_InvalidOperand;\n"; - OS << " }\n\n"; - OS << " // Some state to try to produce better error messages.\n"; OS << " bool HadMatchOtherThanFeatures = false;\n"; OS << " bool HadMatchOtherThanPredicate = false;\n"; @@ -2683,17 +2876,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << " HadMatchOtherThanFeatures = true;\n"; OS << " unsigned NewMissingFeatures = it->RequiredFeatures & " "~AvailableFeatures;\n"; - OS << " if (CountPopulation_32(NewMissingFeatures) <= " - "CountPopulation_32(MissingFeatures))\n"; + OS << " if (CountPopulation_32(NewMissingFeatures) <=\n" + " CountPopulation_32(MissingFeatures))\n"; OS << " MissingFeatures = NewMissingFeatures;\n"; OS << " continue;\n"; OS << " }\n"; OS << "\n"; OS << " // We have selected a definite instruction, convert the parsed\n" << " // operands into the appropriate MCInst.\n"; - OS << " if (!ConvertToMCInst(it->ConvertFn, Inst,\n" - << " it->Opcode, Operands))\n"; - OS << " return Match_ConversionFail;\n"; + OS << " convertToMCInst(it->ConvertFn, Inst, it->Opcode, Operands);\n"; OS << "\n"; // Verify the instruction with the target-specific match predicate function. @@ -2714,6 +2905,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { if (!InsnCleanupFn.empty()) OS << " " << InsnCleanupFn << "(Inst);\n"; + OS << " Kind = it->ConvertFn;\n"; OS << " return Match_Success;\n"; OS << " }\n\n"; diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp index 31a39b1f04..9c8ad67b42 100644 --- a/utils/TableGen/CodeEmitterGen.cpp +++ b/utils/TableGen/CodeEmitterGen.cpp @@ -92,7 +92,7 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) { int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI, int bit) { if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit))) { - if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getVariable())) + if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getBitVar())) if (VI->getName() == VarName) return VBI->getBitNum(); } else if (VarInit *VI = dynamic_cast<VarInit*>(BI->getBit(bit))) { diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 34f8a34e7a..8713a56916 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1410,19 +1410,13 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { // Make sure that the value is representable for this type. if (Size >= 32) return MadeChange; - int Val = (II->getValue() << (32-Size)) >> (32-Size); - if (Val == II->getValue()) return MadeChange; - - // If sign-extended doesn't fit, does it fit as unsigned? - unsigned ValueMask; - unsigned UnsignedVal; - ValueMask = unsigned(~uint32_t(0UL) >> (32-Size)); - UnsignedVal = unsigned(II->getValue()); - - if ((ValueMask & UnsignedVal) == UnsignedVal) + // Check that the value doesn't use more bits than we have. It must either + // be a sign- or zero-extended equivalent of the original. + int64_t SignBitAndAbove = II->getValue() >> (Size - 1); + if (SignBitAndAbove == -1 || SignBitAndAbove == 0 || SignBitAndAbove == 1) return MadeChange; - TP.error("Integer value '" + itostr(II->getValue())+ + TP.error("Integer value '" + itostr(II->getValue()) + "' is out of range for type '" + getEnumName(getType(0)) + "'!"); return MadeChange; } @@ -1581,8 +1575,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { // If the instruction expects a predicate or optional def operand, we // codegen this by setting the operand to it's default value if it has a // non-empty DefaultOps field. - if ((OperandNode->isSubClassOf("PredicateOperand") || - OperandNode->isSubClassOf("OptionalDefOperand")) && + if (OperandNode->isSubClassOf("OperandWithDefaultOps") && !CDP.getDefaultOperand(OperandNode).DefaultOps.empty()) continue; @@ -2033,6 +2026,9 @@ CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) : // stores, and side effects in many cases by examining an // instruction's pattern. InferInstructionFlags(); + + // Verify that instruction flags match the patterns. + VerifyInstructionFlags(); } CodeGenDAGPatterns::~CodeGenDAGPatterns() { @@ -2176,53 +2172,46 @@ void CodeGenDAGPatterns::ParsePatternFragments() { } void CodeGenDAGPatterns::ParseDefaultOperands() { - std::vector<Record*> DefaultOps[2]; - DefaultOps[0] = Records.getAllDerivedDefinitions("PredicateOperand"); - DefaultOps[1] = Records.getAllDerivedDefinitions("OptionalDefOperand"); + std::vector<Record*> DefaultOps; + DefaultOps = Records.getAllDerivedDefinitions("OperandWithDefaultOps"); // Find some SDNode. assert(!SDNodes.empty() && "No SDNodes parsed?"); Init *SomeSDNode = DefInit::get(SDNodes.begin()->first); - for (unsigned iter = 0; iter != 2; ++iter) { - for (unsigned i = 0, e = DefaultOps[iter].size(); i != e; ++i) { - DagInit *DefaultInfo = DefaultOps[iter][i]->getValueAsDag("DefaultOps"); - - // Clone the DefaultInfo dag node, changing the operator from 'ops' to - // SomeSDnode so that we can parse this. - std::vector<std::pair<Init*, std::string> > Ops; - for (unsigned op = 0, e = DefaultInfo->getNumArgs(); op != e; ++op) - Ops.push_back(std::make_pair(DefaultInfo->getArg(op), - DefaultInfo->getArgName(op))); - DagInit *DI = DagInit::get(SomeSDNode, "", Ops); - - // Create a TreePattern to parse this. - TreePattern P(DefaultOps[iter][i], DI, false, *this); - assert(P.getNumTrees() == 1 && "This ctor can only produce one tree!"); - - // Copy the operands over into a DAGDefaultOperand. - DAGDefaultOperand DefaultOpInfo; - - TreePatternNode *T = P.getTree(0); - for (unsigned op = 0, e = T->getNumChildren(); op != e; ++op) { - TreePatternNode *TPN = T->getChild(op); - while (TPN->ApplyTypeConstraints(P, false)) - /* Resolve all types */; - - if (TPN->ContainsUnresolvedType()) { - if (iter == 0) - throw "Value #" + utostr(i) + " of PredicateOperand '" + - DefaultOps[iter][i]->getName() +"' doesn't have a concrete type!"; - else - throw "Value #" + utostr(i) + " of OptionalDefOperand '" + - DefaultOps[iter][i]->getName() +"' doesn't have a concrete type!"; - } - DefaultOpInfo.DefaultOps.push_back(TPN); + for (unsigned i = 0, e = DefaultOps.size(); i != e; ++i) { + DagInit *DefaultInfo = DefaultOps[i]->getValueAsDag("DefaultOps"); + + // Clone the DefaultInfo dag node, changing the operator from 'ops' to + // SomeSDnode so that we can parse this. + std::vector<std::pair<Init*, std::string> > Ops; + for (unsigned op = 0, e = DefaultInfo->getNumArgs(); op != e; ++op) + Ops.push_back(std::make_pair(DefaultInfo->getArg(op), + DefaultInfo->getArgName(op))); + DagInit *DI = DagInit::get(SomeSDNode, "", Ops); + + // Create a TreePattern to parse this. + TreePattern P(DefaultOps[i], DI, false, *this); + assert(P.getNumTrees() == 1 && "This ctor can only produce one tree!"); + + // Copy the operands over into a DAGDefaultOperand. + DAGDefaultOperand DefaultOpInfo; + + TreePatternNode *T = P.getTree(0); + for (unsigned op = 0, e = T->getNumChildren(); op != e; ++op) { + TreePatternNode *TPN = T->getChild(op); + while (TPN->ApplyTypeConstraints(P, false)) + /* Resolve all types */; + + if (TPN->ContainsUnresolvedType()) { + throw "Value #" + utostr(i) + " of OperandWithDefaultOps '" + + DefaultOps[i]->getName() +"' doesn't have a concrete type!"; } - - // Insert it into the DefaultOperands map so we can find it later. - DefaultOperands[DefaultOps[iter][i]] = DefaultOpInfo; + DefaultOpInfo.DefaultOps.push_back(TPN); } + + // Insert it into the DefaultOperands map so we can find it later. + DefaultOperands[DefaultOps[i]] = DefaultOpInfo; } } @@ -2367,36 +2356,29 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat, class InstAnalyzer { const CodeGenDAGPatterns &CDP; - bool &mayStore; - bool &mayLoad; - bool &IsBitcast; - bool &HasSideEffects; - bool &IsVariadic; public: - InstAnalyzer(const CodeGenDAGPatterns &cdp, - bool &maystore, bool &mayload, bool &isbc, bool &hse, bool &isv) - : CDP(cdp), mayStore(maystore), mayLoad(mayload), IsBitcast(isbc), - HasSideEffects(hse), IsVariadic(isv) { - } + bool hasSideEffects; + bool mayStore; + bool mayLoad; + bool isBitcast; + bool isVariadic; - /// Analyze - Analyze the specified instruction, returning true if the - /// instruction had a pattern. - bool Analyze(Record *InstRecord) { - const TreePattern *Pattern = CDP.getInstruction(InstRecord).getPattern(); - if (Pattern == 0) { - HasSideEffects = 1; - return false; // No pattern. - } + InstAnalyzer(const CodeGenDAGPatterns &cdp) + : CDP(cdp), hasSideEffects(false), mayStore(false), mayLoad(false), + isBitcast(false), isVariadic(false) {} - // FIXME: Assume only the first tree is the pattern. The others are clobber - // nodes. - AnalyzeNode(Pattern->getTree(0)); - return true; + void Analyze(const TreePattern *Pat) { + // Assume only the first tree is the pattern. The others are clobber nodes. + AnalyzeNode(Pat->getTree(0)); + } + + void Analyze(const PatternToMatch *Pat) { + AnalyzeNode(Pat->getSrcPattern()); } private: bool IsNodeBitcast(const TreePatternNode *N) const { - if (HasSideEffects || mayLoad || mayStore || IsVariadic) + if (hasSideEffects || mayLoad || mayStore || isVariadic) return false; if (N->getNumChildren() != 2) @@ -2418,6 +2400,7 @@ private: return OpInfo.getEnumName() == "ISD::BITCAST"; } +public: void AnalyzeNode(const TreePatternNode *N) { if (N->isLeaf()) { if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) { @@ -2427,7 +2410,7 @@ private: const ComplexPattern &CP = CDP.getComplexPattern(LeafRec); if (CP.hasProperty(SDNPMayStore)) mayStore = true; if (CP.hasProperty(SDNPMayLoad)) mayLoad = true; - if (CP.hasProperty(SDNPSideEffect)) HasSideEffects = true; + if (CP.hasProperty(SDNPSideEffect)) hasSideEffects = true; } } return; @@ -2439,7 +2422,7 @@ private: // Ignore set nodes, which are not SDNodes. if (N->getOperator()->getName() == "set") { - IsBitcast = IsNodeBitcast(N); + isBitcast = IsNodeBitcast(N); return; } @@ -2449,8 +2432,8 @@ private: // Notice properties of the node. if (OpInfo.hasProperty(SDNPMayStore)) mayStore = true; if (OpInfo.hasProperty(SDNPMayLoad)) mayLoad = true; - if (OpInfo.hasProperty(SDNPSideEffect)) HasSideEffects = true; - if (OpInfo.hasProperty(SDNPVariadic)) IsVariadic = true; + if (OpInfo.hasProperty(SDNPSideEffect)) hasSideEffects = true; + if (OpInfo.hasProperty(SDNPVariadic)) isVariadic = true; if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) { // If this is an intrinsic, analyze it. @@ -2462,62 +2445,64 @@ private: if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem) // WriteMem intrinsics can have other strange effects. - HasSideEffects = true; + hasSideEffects = true; } } }; -static void InferFromPattern(const CodeGenInstruction &Inst, - bool &MayStore, bool &MayLoad, - bool &IsBitcast, - bool &HasSideEffects, bool &IsVariadic, - const CodeGenDAGPatterns &CDP) { - MayStore = MayLoad = IsBitcast = HasSideEffects = IsVariadic = false; - - bool HadPattern = - InstAnalyzer(CDP, MayStore, MayLoad, IsBitcast, HasSideEffects, IsVariadic) - .Analyze(Inst.TheDef); - - // InstAnalyzer only correctly analyzes mayStore/mayLoad so far. - if (Inst.mayStore) { // If the .td file explicitly sets mayStore, use it. - // If we decided that this is a store from the pattern, then the .td file - // entry is redundant. - if (MayStore) - PrintWarning(Inst.TheDef->getLoc(), - "mayStore flag explicitly set on " - "instruction, but flag already inferred from pattern."); - MayStore = true; +static bool InferFromPattern(CodeGenInstruction &InstInfo, + const InstAnalyzer &PatInfo, + Record *PatDef) { + bool Error = false; + + // Remember where InstInfo got its flags. + if (InstInfo.hasUndefFlags()) + InstInfo.InferredFrom = PatDef; + + // Check explicitly set flags for consistency. + if (InstInfo.hasSideEffects != PatInfo.hasSideEffects && + !InstInfo.hasSideEffects_Unset) { + // Allow explicitly setting hasSideEffects = 1 on instructions, even when + // the pattern has no side effects. That could be useful for div/rem + // instructions that may trap. + if (!InstInfo.hasSideEffects) { + Error = true; + PrintError(PatDef->getLoc(), "Pattern doesn't match hasSideEffects = " + + Twine(InstInfo.hasSideEffects)); + } } - if (Inst.mayLoad) { // If the .td file explicitly sets mayLoad, use it. - // If we decided that this is a load from the pattern, then the .td file - // entry is redundant. - if (MayLoad) - PrintWarning(Inst.TheDef->getLoc(), - "mayLoad flag explicitly set on " - "instruction, but flag already inferred from pattern."); - MayLoad = true; + if (InstInfo.mayStore != PatInfo.mayStore && !InstInfo.mayStore_Unset) { + Error = true; + PrintError(PatDef->getLoc(), "Pattern doesn't match mayStore = " + + Twine(InstInfo.mayStore)); } - if (Inst.neverHasSideEffects) { - if (HadPattern) - PrintWarning(Inst.TheDef->getLoc(), - "neverHasSideEffects flag explicitly set on " - "instruction, but flag already inferred from pattern."); - HasSideEffects = false; + if (InstInfo.mayLoad != PatInfo.mayLoad && !InstInfo.mayLoad_Unset) { + // Allow explicitly setting mayLoad = 1, even when the pattern has no loads. + // Some targets translate imediates to loads. + if (!InstInfo.mayLoad) { + Error = true; + PrintError(PatDef->getLoc(), "Pattern doesn't match mayLoad = " + + Twine(InstInfo.mayLoad)); + } } - if (Inst.hasSideEffects) { - if (HasSideEffects) - PrintWarning(Inst.TheDef->getLoc(), - "hasSideEffects flag explicitly set on " - "instruction, but flag already inferred from pattern."); - HasSideEffects = true; - } + // Transfer inferred flags. + InstInfo.hasSideEffects |= PatInfo.hasSideEffects; + InstInfo.mayStore |= PatInfo.mayStore; + InstInfo.mayLoad |= PatInfo.mayLoad; - if (Inst.Operands.isVariadic) - IsVariadic = true; // Can warn if we want. + // These flags are silently added without any verification. + InstInfo.isBitcast |= PatInfo.isBitcast; + + // Don't infer isVariadic. This flag means something different on SDNodes and + // instructions. For example, a CALL SDNode is variadic because it has the + // call arguments as operands, but a CALL instruction is not variadic - it + // has argument registers as implicit, not explicit uses. + + return Error; } /// hasNullFragReference - Return true if the DAG has any reference to the @@ -2551,6 +2536,17 @@ static bool hasNullFragReference(ListInit *LI) { return false; } +/// Get all the instructions in a tree. +static void +getInstructionsInTree(TreePatternNode *Tree, SmallVectorImpl<Record*> &Instrs) { + if (Tree->isLeaf()) + return; + if (Tree->getOperator()->isSubClassOf("Instruction")) + Instrs.push_back(Tree->getOperator()); + for (unsigned i = 0, e = Tree->getNumChildren(); i != e; ++i) + getInstructionsInTree(Tree->getChild(i), Instrs); +} + /// ParseInstructions - Parse all of the instructions, inlining and resolving /// any fragments involved. This populates the Instructions list with fully /// resolved instructions. @@ -2683,11 +2679,9 @@ void CodeGenDAGPatterns::ParseInstructions() { I->error("Operand #" + utostr(i) + " in operands list has no name!"); if (!InstInputsCheck.count(OpName)) { - // If this is an predicate operand or optional def operand with an - // DefaultOps set filled in, we can ignore this. When we codegen it, - // we will do so as always executed. - if (Op.Rec->isSubClassOf("PredicateOperand") || - Op.Rec->isSubClassOf("OptionalDefOperand")) { + // If this is an operand with a DefaultOps set filled in, we can ignore + // this. When we codegen it, we will do so as always executed. + if (Op.Rec->isSubClassOf("OperandWithDefaultOps")) { // Does it have a non-empty DefaultOps field? If so, ignore this // operand. if (!getDefaultOperand(Op.Rec).DefaultOps.empty()) @@ -2852,25 +2846,156 @@ void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern, void CodeGenDAGPatterns::InferInstructionFlags() { const std::vector<const CodeGenInstruction*> &Instructions = Target.getInstructionsByEnumValue(); + + // First try to infer flags from the primary instruction pattern, if any. + SmallVector<CodeGenInstruction*, 8> Revisit; + unsigned Errors = 0; for (unsigned i = 0, e = Instructions.size(); i != e; ++i) { CodeGenInstruction &InstInfo = const_cast<CodeGenInstruction &>(*Instructions[i]); - // Determine properties of the instruction from its pattern. - bool MayStore, MayLoad, IsBitcast, HasSideEffects, IsVariadic; - InferFromPattern(InstInfo, MayStore, MayLoad, IsBitcast, - HasSideEffects, IsVariadic, *this); - InstInfo.mayStore = MayStore; - InstInfo.mayLoad = MayLoad; - InstInfo.isBitcast = IsBitcast; - InstInfo.hasSideEffects = HasSideEffects; - InstInfo.Operands.isVariadic = IsVariadic; - // Sanity checks. - if (InstInfo.isReMaterializable && InstInfo.hasSideEffects) - throw TGError(InstInfo.TheDef->getLoc(), "The instruction " + - InstInfo.TheDef->getName() + - " is rematerializable AND has unmodeled side effects?"); + // Treat neverHasSideEffects = 1 as the equivalent of hasSideEffects = 0. + // This flag is obsolete and will be removed. + if (InstInfo.neverHasSideEffects) { + assert(!InstInfo.hasSideEffects); + InstInfo.hasSideEffects_Unset = false; + } + + // Get the primary instruction pattern. + const TreePattern *Pattern = getInstruction(InstInfo.TheDef).getPattern(); + if (!Pattern) { + if (InstInfo.hasUndefFlags()) + Revisit.push_back(&InstInfo); + continue; + } + InstAnalyzer PatInfo(*this); + PatInfo.Analyze(Pattern); + Errors += InferFromPattern(InstInfo, PatInfo, InstInfo.TheDef); + } + + // Second, look for single-instruction patterns defined outside the + // instruction. + for (ptm_iterator I = ptm_begin(), E = ptm_end(); I != E; ++I) { + const PatternToMatch &PTM = *I; + + // We can only infer from single-instruction patterns, otherwise we won't + // know which instruction should get the flags. + SmallVector<Record*, 8> PatInstrs; + getInstructionsInTree(PTM.getDstPattern(), PatInstrs); + if (PatInstrs.size() != 1) + continue; + + // Get the single instruction. + CodeGenInstruction &InstInfo = Target.getInstruction(PatInstrs.front()); + + // Only infer properties from the first pattern. We'll verify the others. + if (InstInfo.InferredFrom) + continue; + + InstAnalyzer PatInfo(*this); + PatInfo.Analyze(&PTM); + Errors += InferFromPattern(InstInfo, PatInfo, PTM.getSrcRecord()); } + + if (Errors) + throw "pattern conflicts"; + + // Revisit instructions with undefined flags and no pattern. + if (Target.guessInstructionProperties()) { + for (unsigned i = 0, e = Revisit.size(); i != e; ++i) { + CodeGenInstruction &InstInfo = *Revisit[i]; + if (InstInfo.InferredFrom) + continue; + // The mayLoad and mayStore flags default to false. + // Conservatively assume hasSideEffects if it wasn't explicit. + if (InstInfo.hasSideEffects_Unset) + InstInfo.hasSideEffects = true; + } + return; + } + + // Complain about any flags that are still undefined. + for (unsigned i = 0, e = Revisit.size(); i != e; ++i) { + CodeGenInstruction &InstInfo = *Revisit[i]; + if (InstInfo.InferredFrom) + continue; + if (InstInfo.hasSideEffects_Unset) + PrintError(InstInfo.TheDef->getLoc(), + "Can't infer hasSideEffects from patterns"); + if (InstInfo.mayStore_Unset) + PrintError(InstInfo.TheDef->getLoc(), + "Can't infer mayStore from patterns"); + if (InstInfo.mayLoad_Unset) + PrintError(InstInfo.TheDef->getLoc(), + "Can't infer mayLoad from patterns"); + } +} + + +/// Verify instruction flags against pattern node properties. +void CodeGenDAGPatterns::VerifyInstructionFlags() { + unsigned Errors = 0; + for (ptm_iterator I = ptm_begin(), E = ptm_end(); I != E; ++I) { + const PatternToMatch &PTM = *I; + SmallVector<Record*, 8> Instrs; + getInstructionsInTree(PTM.getDstPattern(), Instrs); + if (Instrs.empty()) + continue; + + // Count the number of instructions with each flag set. + unsigned NumSideEffects = 0; + unsigned NumStores = 0; + unsigned NumLoads = 0; + for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { + const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]); + NumSideEffects += InstInfo.hasSideEffects; + NumStores += InstInfo.mayStore; + NumLoads += InstInfo.mayLoad; + } + + // Analyze the source pattern. + InstAnalyzer PatInfo(*this); + PatInfo.Analyze(&PTM); + + // Collect error messages. + SmallVector<std::string, 4> Msgs; + + // Check for missing flags in the output. + // Permit extra flags for now at least. + if (PatInfo.hasSideEffects && !NumSideEffects) + Msgs.push_back("pattern has side effects, but hasSideEffects isn't set"); + + // Don't verify store flags on instructions with side effects. At least for + // intrinsics, side effects implies mayStore. + if (!PatInfo.hasSideEffects && PatInfo.mayStore && !NumStores) + Msgs.push_back("pattern may store, but mayStore isn't set"); + + // Similarly, mayStore implies mayLoad on intrinsics. + if (!PatInfo.mayStore && PatInfo.mayLoad && !NumLoads) + Msgs.push_back("pattern may load, but mayLoad isn't set"); + + // Print error messages. + if (Msgs.empty()) + continue; + ++Errors; + + for (unsigned i = 0, e = Msgs.size(); i != e; ++i) + PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msgs[i]) + " on the " + + (Instrs.size() == 1 ? + "instruction" : "output instructions")); + // Provide the location of the relevant instruction definitions. + for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { + if (Instrs[i] != PTM.getSrcRecord()) + PrintError(Instrs[i]->getLoc(), "defined here"); + const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]); + if (InstInfo.InferredFrom && + InstInfo.InferredFrom != InstInfo.TheDef && + InstInfo.InferredFrom != PTM.getSrcRecord()) + PrintError(InstInfo.InferredFrom->getLoc(), "inferred from patttern"); + } + } + if (Errors) + throw "Errors in DAG patterns"; } /// Given a pattern result with an unresolved type, see if we can find one @@ -3330,4 +3455,3 @@ void CodeGenDAGPatterns::GenerateVariants() { DEBUG(errs() << "\n"); } } - diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 5a2d40aa7c..25a0e4bb10 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -582,8 +582,8 @@ private: void ComputeNamedNodes(TreePatternNode *N); }; -/// DAGDefaultOperand - One of these is created for each PredicateOperand -/// or OptionalDefOperand that has a set ExecuteAlways / DefaultOps field. +/// DAGDefaultOperand - One of these is created for each OperandWithDefaultOps +/// that has a set ExecuteAlways / DefaultOps field. struct DAGDefaultOperand { std::vector<TreePatternNode*> DefaultOps; }; @@ -797,6 +797,7 @@ private: void ParsePatterns(); void InferInstructionFlags(); void GenerateVariants(); + void VerifyInstructionFlags(); void AddPatternToMatch(const TreePattern *Pattern, const PatternToMatch &PTM); void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat, diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index 12e153a665..38e2b832f2 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -287,7 +287,8 @@ void CGIOperandList::ProcessDisableEncoding(std::string DisableEncoding) { // CodeGenInstruction Implementation //===----------------------------------------------------------------------===// -CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) { +CodeGenInstruction::CodeGenInstruction(Record *R) + : TheDef(R), Operands(R), InferredFrom(0) { Namespace = R->getValueAsString("Namespace"); AsmString = R->getValueAsString("AsmString"); @@ -301,8 +302,6 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) { isBarrier = R->getValueAsBit("isBarrier"); isCall = R->getValueAsBit("isCall"); canFoldAsLoad = R->getValueAsBit("canFoldAsLoad"); - mayLoad = R->getValueAsBit("mayLoad"); - mayStore = R->getValueAsBit("mayStore"); isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable"); isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress"); isCommutable = R->getValueAsBit("isCommutable"); @@ -313,8 +312,13 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) { hasPostISelHook = R->getValueAsBit("hasPostISelHook"); hasCtrlDep = R->getValueAsBit("hasCtrlDep"); isNotDuplicable = R->getValueAsBit("isNotDuplicable"); - hasSideEffects = R->getValueAsBit("hasSideEffects"); + + mayLoad = R->getValueAsBitOrUnset("mayLoad", mayLoad_Unset); + mayStore = R->getValueAsBitOrUnset("mayStore", mayStore_Unset); + hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", + hasSideEffects_Unset); neverHasSideEffects = R->getValueAsBit("neverHasSideEffects"); + isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove"); hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq"); hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq"); @@ -409,7 +413,7 @@ FlattenAsmStringVariants(StringRef Cur, unsigned Variant) { /// successful match, with ResOp set to the result operand to be used. bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, Record *InstOpRec, bool hasSubOps, - SMLoc Loc, CodeGenTarget &T, + ArrayRef<SMLoc> Loc, CodeGenTarget &T, ResultOperand &ResOp) { Init *Arg = Result->getArg(AliasOpNo); DefInit *ADI = dynamic_cast<DefInit*>(Arg); diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index 95b572d2d0..f601a8318f 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -226,7 +226,10 @@ namespace llvm { bool isBarrier; bool isCall; bool canFoldAsLoad; - bool mayLoad, mayStore; + bool mayLoad; + bool mayLoad_Unset; + bool mayStore; + bool mayStore_Unset; bool isPredicable; bool isConvertibleToThreeAddress; bool isCommutable; @@ -238,6 +241,7 @@ namespace llvm { bool hasCtrlDep; bool isNotDuplicable; bool hasSideEffects; + bool hasSideEffects_Unset; bool neverHasSideEffects; bool isAsCheapAsAMove; bool hasExtraSrcRegAllocReq; @@ -245,6 +249,14 @@ namespace llvm { bool isCodeGenOnly; bool isPseudo; + /// Are there any undefined flags? + bool hasUndefFlags() const { + return mayLoad_Unset || mayStore_Unset || hasSideEffects_Unset; + } + + // The record used to infer instruction flags, or NULL if no flag values + // have been inferred. + Record *InferredFrom; CodeGenInstruction(Record *R); @@ -319,7 +331,7 @@ namespace llvm { CodeGenInstAlias(Record *R, CodeGenTarget &T); bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, - Record *InstOpRec, bool hasSubOps, SMLoc Loc, + Record *InstOpRec, bool hasSubOps, ArrayRef<SMLoc> Loc, CodeGenTarget &T, ResultOperand &ResOp); }; } diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index 011f4b7938..b2e9e38bc1 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -298,7 +298,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { for (SubRegMap::const_iterator SI = SubRegs.begin(), SE = SubRegs.end(); SI != SE; ++SI) { if (SI->second == this) { - SMLoc Loc; + ArrayRef<SMLoc> Loc; if (TheDef) Loc = TheDef->getLoc(); throw TGError(Loc, "Register " + getName() + @@ -310,7 +310,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { if (Ins->second == SI->first) continue; // Trouble: Two different names for SI->second. - SMLoc Loc; + ArrayRef<SMLoc> Loc; if (TheDef) Loc = TheDef->getLoc(); throw TGError(Loc, "Sub-register can't have two names: " + diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index 84c3f19de1..d5c615a1de 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -300,6 +300,8 @@ void CodeGenTarget::ComputeInstrsByEnum() const { "REG_SEQUENCE", "COPY", "BUNDLE", + "LIFETIME_START", + "LIFETIME_END", // @LOCALMOD-BEGIN "BUNDLE_ALIGN_START", "BUNDLE_ALIGN_END", @@ -340,6 +342,15 @@ bool CodeGenTarget::isLittleEndianEncoding() const { return getInstructionSet()->getValueAsBit("isLittleEndianEncoding"); } +/// guessInstructionProperties - Return true if it's OK to guess instruction +/// properties instead of raising an error. +/// +/// This is configurable as a temporary migration aid. It will eventually be +/// permanently false. +bool CodeGenTarget::guessInstructionProperties() const { + return getInstructionSet()->getValueAsBit("guessInstructionProperties"); +} + //===----------------------------------------------------------------------===// // ComplexPattern implementation // diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 2f8cee4588..672b1406a5 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -177,6 +177,10 @@ public: /// bool isLittleEndianEncoding() const; + /// guessInstructionProperties - should we just guess unset instruction + /// properties? + bool guessInstructionProperties() const; + private: void ComputeInstrsByEnum() const; }; diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index aed222c094..b291269933 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -727,8 +727,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N, // Determine what to emit for this operand. Record *OperandNode = II.Operands[InstOpNo].Rec; - if ((OperandNode->isSubClassOf("PredicateOperand") || - OperandNode->isSubClassOf("OptionalDefOperand")) && + if (OperandNode->isSubClassOf("OperandWithDefaultOps") && !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) { // This is a predicate or optional def operand; emit the // 'default ops' operands. diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index e89c393b6a..aa6d7962a0 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -1783,7 +1783,7 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc, VarInit *Var = 0; VarBitInit *BI = dynamic_cast<VarBitInit*>(Bits.getBit(bi)); if (BI) - Var = dynamic_cast<VarInit*>(BI->getVariable()); + Var = dynamic_cast<VarInit*>(BI->getBitVar()); else Var = dynamic_cast<VarInit*>(Bits.getBit(bi)); diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp index 8d9d419544..8e28180ae8 100644 --- a/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/utils/TableGen/PseudoLoweringEmitter.cpp @@ -156,7 +156,7 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) { // If there are more operands that weren't in the DAG, they have to // be operands that have default values, or we have an error. Currently, - // PredicateOperand and OptionalDefOperand both have default values. + // Operands that are a sublass of OperandWithDefaultOp have default values. // Validate that each result pattern argument has a matching (by name) diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h index d8ab2eeb25..60202b5ade 100644 --- a/utils/TableGen/SequenceToOffsetTable.h +++ b/utils/TableGen/SequenceToOffsetTable.h @@ -29,8 +29,8 @@ namespace llvm { /// Compute the layout of a table that contains all the sequences, possibly by /// reusing entries. /// -/// @param SeqT The sequence container. (vector or string). -/// @param Less A stable comparator for SeqT elements. +/// @tparam SeqT The sequence container. (vector or string). +/// @tparam Less A stable comparator for SeqT elements. template<typename SeqT, typename Less = std::less<typename SeqT::value_type> > class SequenceToOffsetTable { typedef typename SeqT::value_type ElemT; diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 3472343959..5dfd716d89 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -626,7 +626,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) { // Emit as { "cpu", procinit }, OS << " { " << "\"" << Name << "\", " - << "(void *)&" << ProcModelName; + << "(const void *)&" << ProcModelName; OS << " }"; diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 7ac2336d73..4b12279cdd 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -1145,6 +1145,8 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString // register IDs in 8-bit immediates nowadays. ENCODING("VR256", ENCODING_IB) ENCODING("VR128", ENCODING_IB) + ENCODING("FR32", ENCODING_IB) + ENCODING("FR64", ENCODING_IB) errs() << "Unhandled immediate encoding " << s << "\n"; llvm_unreachable("Unhandled immediate encoding"); } diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h index 542e510c60..e0bb2e24e2 100644 --- a/utils/TableGen/X86RecognizableInstr.h +++ b/utils/TableGen/X86RecognizableInstr.h @@ -143,7 +143,7 @@ private: /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W /// prefix. If it does, 32-bit register operands stay /// 32-bit regardless of the operand size. - /// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize + /// @param hasOpSizePrefix Indicates whether the instruction has an OpSize /// prefix. If it does not, then 16-bit register /// operands stay 16-bit. /// @return - The operand's type. diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in index 879d18bdc8..768dc5103c 100755 --- a/utils/llvm-lit/llvm-lit.in +++ b/utils/llvm-lit/llvm-lit.in @@ -18,10 +18,15 @@ builtin_parameters = { 'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg') } -clang_site_config = os.path.join(llvm_obj_root, 'tools', 'clang', 'test', - 'lit.site.cfg') -if os.path.exists(clang_site_config): - builtin_parameters['clang_site_config'] = clang_site_config +clang_obj_root = os.path.join(llvm_obj_root, 'tools', 'clang') + +if os.path.exists(clang_obj_root): + builtin_parameters['clang_site_config'] = \ + os.path.join(clang_obj_root, 'test', 'lit.site.cfg') + clang_tools_extra_obj_root = os.path.join(clang_obj_root, 'tools', 'extra') + if os.path.exists(clang_tools_extra_obj_root): + builtin_parameters['clang_tools_extra_site_config'] = \ + os.path.join(clang_tools_extra_obj_root, 'test', 'lit.site.cfg') if __name__=='__main__': import lit diff --git a/utils/llvm.grm b/utils/llvm.grm index ad2799f2c5..322036b2c2 100644 --- a/utils/llvm.grm +++ b/utils/llvm.grm @@ -175,7 +175,6 @@ FuncAttr ::= noreturn | returns_twice | nonlazybind | address_safety - | ia_nsdialect ; OptFuncAttrs ::= + _ | OptFuncAttrs FuncAttr ; diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/utils/unittest/googletest/include/gtest/internal/gtest-port.h index 8ef5d7dd26..58f6cafa75 100644 --- a/utils/unittest/googletest/include/gtest/internal/gtest-port.h +++ b/utils/unittest/googletest/include/gtest/internal/gtest-port.h @@ -230,7 +230,7 @@ # define GTEST_OS_MAC 1 #elif defined __linux__ # define GTEST_OS_LINUX 1 -# ifdef ANDROID +# if defined(ANDROID) || defined(__ANDROID__) # define GTEST_OS_LINUX_ANDROID 1 # endif // ANDROID #elif defined __MVS__ |