diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-11-06 10:23:47 -0800 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-11-06 10:23:47 -0800 |
commit | 5bcab54cfde18b4b11f163d7d916711df70cbebf (patch) | |
tree | c5774bfc00faa412178497d9ae92dea73d717a7c | |
parent | 96cb06677afe87ea958bf986ca2b9fb87daa2da1 (diff) | |
parent | cfe09ed28d8a65b671e8b7a716a933e98e810e32 (diff) |
Merge commit 'cfe09ed28d8a65b671e8b7a716a933e98e810e32'
Conflicts:
lib/Target/ARM/ARMFrameLowering.cpp
lib/Target/Mips/MipsRegisterInfo.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Transforms/IPO/ExtractGV.cpp
tools/Makefile
tools/gold/gold-plugin.cpp
The only interesting conflict was X86ISelLowering.ccp, which
meant I had to essentially revert r167104. The problem is that we are
using ESP as the stack pointer in X86ISelLowering and RSP as the
stack pointer in X86FrameLowering, and that revision made them
both consistently use X86RegisterInfo to determine which to use.
481 files changed, 12411 insertions, 7075 deletions
diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 414e5bf5f1..13134589af 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -368,8 +368,8 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - mips-*) llvm_cv_target_arch="Mips" ;; - mipsel-*) llvm_cv_target_arch="Mips" ;; + mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; + mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; @@ -401,8 +401,8 @@ case $host in sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; arm*-*) host_arch="ARM" ;; - mips-*) host_arch="Mips" ;; - mipsel-*) host_arch="Mips" ;; + mips-* | mips64-*) host_arch="Mips" ;; + mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; msp430-*) host_arch="MSP430" ;; hexagon-*) host_arch="Hexagon" ;; @@ -719,6 +719,8 @@ case "$enableval" in arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; @@ -3908,8 +3908,8 @@ else sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - mips-*) llvm_cv_target_arch="Mips" ;; - mipsel-*) llvm_cv_target_arch="Mips" ;; + mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; + mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; @@ -3941,8 +3941,8 @@ case $host in sparc*-*) host_arch="Sparc" ;; powerpc*-*) host_arch="PowerPC" ;; arm*-*) host_arch="ARM" ;; - mips-*) host_arch="Mips" ;; - mipsel-*) host_arch="Mips" ;; + mips-* | mips64-*) host_arch="Mips" ;; + mipsel-* | mips64el-*) host_arch="Mips" ;; xcore-*) host_arch="XCore" ;; msp430-*) host_arch="MSP430" ;; hexagon-*) host_arch="Hexagon" ;; @@ -5433,6 +5433,8 @@ case "$enableval" in arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + mips64) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + mips64el) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; @@ -10318,7 +10320,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10316 "configure" +#line 10318 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index f387e7f4c5..5fab76ec1a 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -224,7 +224,7 @@ The ``DataLayout`` class ------------------------ The ``DataLayout`` class is the only required target description class, and it -is the only class that is not extensible (you cannot derived a new class from +is the only class that is not extensible (you cannot derive a new class from it). ``DataLayout`` specifies information about how the target lays out memory for structures, the alignment requirements for various data types, the size of pointers in the target, and whether the target is little-endian or @@ -248,7 +248,7 @@ operations. Among other things, this class indicates: * the type to use for shift amounts, and * various high-level characteristics, like whether it is profitable to turn - division by a constant into a multiplication sequence + division by a constant into a multiplication sequence. The ``TargetRegisterInfo`` class -------------------------------- @@ -256,10 +256,10 @@ The ``TargetRegisterInfo`` class The ``TargetRegisterInfo`` class is used to describe the register file of the target and any interactions between the registers. -Registers in the code generator are represented in the code generator by -unsigned integers. Physical registers (those that actually exist in the target -description) are unique small numbers, and virtual registers are generally -large. Note that register ``#0`` is reserved as a flag value. +Registers are represented in the code generator by unsigned integers. Physical +registers (those that actually exist in the target description) are unique +small numbers, and virtual registers are generally large. Note that +register ``#0`` is reserved as a flag value. Each register in the processor description has an associated ``TargetRegisterDesc`` entry, which provides a textual name for the register @@ -838,8 +838,7 @@ Initial SelectionDAG Construction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The initial SelectionDAG is na\ :raw-html:`ï`\ vely peephole expanded from -the LLVM input by the ``SelectionDAGLowering`` class in the -``lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp`` file. The intent of this pass +the LLVM input by the ``SelectionDAGBuilder`` class. The intent of this pass is to expose as much low-level, target-specific details to the SelectionDAG as possible. This pass is mostly hard-coded (e.g. an LLVM ``add`` turns into an ``SDNode add`` while a ``getelementptr`` is expanded into the obvious diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst index 418e3f05a3..90835307b1 100644 --- a/docs/CodingStandards.rst +++ b/docs/CodingStandards.rst @@ -862,23 +862,28 @@ Here are more examples: You get the idea. -Please be aware that, when adding assert statements, not all compilers are aware -of the semantics of the assert. In some places, asserts are used to indicate a -piece of code that should not be reached. These are typically of the form: +In the past, asserts were used to indicate a piece of code that should not be +reached. These were typically of the form: .. code-block:: c++ - assert(0 && "Some helpful error message"); + assert(0 && "Invalid radix for integer literal"); -When used in a function that returns a value, they should be followed with a -return statement and a comment indicating that this line is never reached. This -will prevent a compiler which is unable to deduce that the assert statement -never returns from generating a warning. +This has a few issues, the main one being that some compilers might not +understand the assertion, or warn about a missing return in builds where +assertions are compiled out. + +Today, we have something much better: ``llvm_unreachable``: .. code-block:: c++ - assert(0 && "Some helpful error message"); - return 0; + llvm_unreachable("Invalid radix for integer literal"); + +When assertions are enabled, this will print the message if it's ever reached +and then exit the program. When assertions are disabled (i.e. in release +builds), ``llvm_unreachable`` becomes a hint to compilers to skip generating +code for this branch. If the compiler does not support this, it will fall back +to the "abort" implementation. Another issue is that values used only by assertions will produce an "unused value" warning when assertions are disabled. For example, this code will warn: diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst index 6f9ac4adc0..d786a7deda 100644 --- a/docs/HowToBuildOnARM.rst +++ b/docs/HowToBuildOnARM.rst @@ -27,8 +27,21 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips. #. If you want to run ``make check-all`` after building LLVM/Clang, to avoid false alarms (eg, ARCMT - failure) please use the following configuration: + failure) please use at least the following configuration: .. code-block:: bash - $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs + $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs-vfp + +#. The most popular linaro/ubuntu OS's for ARM boards, eg, the + Pandaboard, have become hard-float platforms. The following set + of configuration options appears to be a good choice for this + platform: + + .. code-block:: bash + + ./configure --build=armv7l-unknown-linux-gnueabihf + --host=armv7l-unknown-linux-gnueabihf + --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 + --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon + --enable-targets=arm --disable-optimized --enable-assertions diff --git a/docs/LangRef.html b/docs/LangRef.html index 874e12fa44..ed47f1f00e 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -5060,7 +5060,7 @@ IfUnequal: <p>The optional constant <tt>align</tt> argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an - omitted <tt>align</tt> argument means that the operation has the preferential + omitted <tt>align</tt> argument means that the operation has the abi alignment for the target. It is the responsibility of the code emitter to ensure that the alignment information is correct. Overestimating the alignment results in undefined behavior. Underestimating the alignment may @@ -5141,7 +5141,7 @@ IfUnequal: <p>The optional constant "align" argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an - omitted "align" argument means that the operation has the preferential + omitted "align" argument means that the operation has the abi alignment for the target. It is the responsibility of the code emitter to ensure that the alignment information is correct. Overestimating the alignment results in an undefined behavior. Underestimating the alignment may diff --git a/docs/Passes.html b/docs/Passes.html index 85292e3741..aa9f8bc247 100644 --- a/docs/Passes.html +++ b/docs/Passes.html @@ -77,6 +77,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if ! <tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (stateless AA impl)</td></tr> <tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr> <tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr> +<tr><td><a href="#da">-da</a></td><td>Dependence Analysis</td></tr> <tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr> <tr><td><a href="#domfrontier">-domfrontier</a></td><td>Dominance Frontier Construction</td></tr> <tr><td><a href="#domtree">-domtree</a></td><td>Dominator Tree Construction</td></tr> @@ -92,7 +93,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if ! <tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr> <tr><td><a href="#iv-users">-iv-users</a></td><td>Induction Variable Users</td></tr> <tr><td><a href="#lazy-value-info">-lazy-value-info</a></td><td>Lazy Value Information Analysis</td></tr> -<tr><td><a href="#lda">-lda</a></td><td>Loop Dependence Analysis</td></tr> <tr><td><a href="#libcall-aa">-libcall-aa</a></td><td>LibCall Alias Analysis</td></tr> <tr><td><a href="#lint">-lint</a></td><td>Statically lint-checks LLVM IR</td></tr> <tr><td><a href="#loops">-loops</a></td><td>Natural Loop Information</td></tr> @@ -182,7 +182,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if ! <tr><td><a href="#strip-debug-declare">-strip-debug-declare</a></td><td>Strip all llvm.dbg.declare intrinsics</td></tr> <tr><td><a href="#strip-nondebug">-strip-nondebug</a></td><td>Strip all symbols, except dbg symbols, from a module</td></tr> <tr><td><a href="#tailcallelim">-tailcallelim</a></td><td>Tail Call Elimination</td></tr> -<tr><td><a href="#tailduplicate">-tailduplicate</a></td><td>Tail Duplication</td></tr> <tr><th colspan="2"><b>UTILITY PASSES</b></th></tr> @@ -251,6 +250,15 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if ! <!-------------------------------------------------------------------------- --> <h3> + <a name="da">-da: Dependence Analysis</a> +</h3> +<div> + <p>Dependence analysis framework, which is used to detect dependences in + memory accesses.</p> +</div> + +<!-------------------------------------------------------------------------- --> +<h3> <a name="debug-aa">-debug-aa: AA use debugger</a> </h3> <div> @@ -433,15 +441,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if ! <!-------------------------------------------------------------------------- --> <h3> - <a name="lda">-lda: Loop Dependence Analysis</a> -</h3> -<div> - <p>Loop dependence analysis framework, which is used to detect dependences in - memory accesses in loops.</p> -</div> - -<!-------------------------------------------------------------------------- --> -<h3> <a name="libcall-aa">-libcall-aa: LibCall Alias Analysis</a> </h3> <div> @@ -1862,22 +1861,6 @@ if (X < 3) {</pre> </ul> </div> -<!-------------------------------------------------------------------------- --> -<h3> - <a name="tailduplicate">-tailduplicate: Tail Duplication</a> -</h3> -<div> - <p> - This pass performs a limited form of tail duplication, intended to simplify - CFGs by removing some unconditional branches. This pass is necessary to - straighten out loops created by the C front-end, but also is capable of - making other code nicer. After this pass is run, the CFG simplify pass - should be run to clean up the mess. - </p> -</div> - -</div> - <!-- ======================================================================= --> <h2><a name="utilities">Utility Passes</a></h2> <div> diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index cd984b09be..13ef9eddd3 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -12,10 +12,16 @@ you can now submit your patches for Clang and LLVM at Sign up ------- -Sign up with one of the supported OAuth account types. If -you use your Subversion user name as Phabricator user name, -Phabricator will automatically connect your submits to your -Phabricator user in the `Code Repository Browser`_. +There are two options to get an account on Phabricator. You can sign up +immediately with one of the supported OAuth account types if you're comfortable +with OAuth, but you can also email chandlerc@gmail.com to request an account to +be created manually without using OAuth. We're working to get support in +Phabricator to directly create new accounts, but currently this is a manual +process. + +Note that if you use your Subversion user name as Phabricator user name, +Phabricator will automatically connect your submits to your Phabricator user in +the `Code Repository Browser`_. Requesting a review via the command line diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 9a1b547b4a..45a9cc5dec 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -468,9 +468,10 @@ Release Notes</a>.</h1> <p> Loop Vectorizer - We've added a loop vectorizer and we are now able to vectorize small loops. The loop vectorizer is disabled by default and - can be enabled using the <b>-mllvm -vectorize</b> flag. + can be enabled using the <b>-mllvm -vectorize-loops</b> flag. The SIMD vector width can be specified using the flag <b>-mllvm -force-vector-width=4</b>. + The default value is <b>0</b> which means auto-select. <br/> We can now vectorize this code: @@ -481,9 +482,13 @@ Release Notes</a>.</h1> } </pre> - </p> +</p> + +<p>SROA - We've re-written SROA to be significantly more powerful. +<!-- FIXME: Add more text here... --></p> <ul> + <li>Branch weight metadata is preseved through more of the optimizer.</li> <li>...</li> </ul> @@ -669,6 +674,9 @@ Release Notes</a>.</h1> "TargetTransformInfo" provides a number of low-level interfaces. LSR and LowerInvoke already use the new interface. </p> +<p> The TargetData structure has been renamed to DataLayout and moved to VMCore +to remove a dependency on Target. </p> + <ul> <li>...</li> </ul> diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 5a625a4c83..31c6e6adbf 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -455,14 +455,11 @@ namespace llvm { /* The sign bit of this number. */ unsigned int sign: 1; - - /* For PPCDoubleDouble, we have a second exponent and sign (the second - significand is appended to the first one, although it would be wrong to - regard these as a single number for arithmetic purposes). These fields - are not meaningful for any other type. */ - exponent_t exponent2 : 11; - unsigned int sign2: 1; }; + + // See friend declaration above. This additional declaration is required in + // order to compile LLVM with IBM xlC compiler. + hash_code hash_value(const APFloat &Arg); } /* namespace llvm */ #endif /* LLVM_FLOAT_H */ diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 4470534e04..c7c8016b83 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -760,7 +760,7 @@ public: APInt shl(unsigned shiftAmt) const { assert(shiftAmt <= BitWidth && "Invalid shift amount"); if (isSingleWord()) { - if (shiftAmt == BitWidth) + if (shiftAmt >= BitWidth) return APInt(BitWidth, 0); // avoid undefined shift results return APInt(BitWidth, VAL << shiftAmt); } @@ -1780,6 +1780,9 @@ inline APInt Not(const APInt& APIVal) { } // End of APIntOps namespace + // See friend declaration above. This additional declaration is required in + // order to compile LLVM with IBM xlC compiler. + hash_code hash_value(const APInt &Arg); } // End of llvm namespace #endif diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 15fe55fbe3..2ace8294a8 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -426,11 +426,6 @@ public: /// architecture name (e.g., "x86"). static ArchType getArchTypeForLLVMName(StringRef Str); - /// getArchTypeForDarwinArchName - Get the architecture type for a "Darwin" - /// architecture name, for example as accepted by "gcc -arch" (see also - /// arch(3)). - static ArchType getArchTypeForDarwinArchName(StringRef Str); - /// @} }; diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h index 3818428a5e..b4327eeb0b 100644 --- a/include/llvm/Analysis/DependenceAnalysis.h +++ b/include/llvm/Analysis/DependenceAnalysis.h @@ -49,7 +49,7 @@ namespace llvm { /// determine anything beyond the existence of a dependence; that is, it /// represents a confused dependence (see also FullDependence). In most /// cases (for output, flow, and anti dependences), the dependence implies - /// an ordering, where the source must preceed the destination; in contrast, + /// an ordering, where the source must precede the destination; in contrast, /// input dependences are unordered. class Dependence { public: @@ -126,7 +126,7 @@ namespace llvm { virtual bool isConsistent() const { return false; } /// getLevels - Returns the number of common loops surrounding the - /// souce and destination of the dependence. + /// source and destination of the dependence. virtual unsigned getLevels() const { return 0; } /// getDirection - Returns the direction associated with a particular @@ -169,7 +169,7 @@ namespace llvm { /// able to accurately analyze the interaction of the references; that is, /// it is not a confused dependence (see Dependence). In most cases /// (for output, flow, and anti dependences), the dependence implies an - /// ordering, where the source must preceed the destination; in contrast, + /// ordering, where the source must precede the destination; in contrast, /// input dependences are unordered. class FullDependence : public Dependence { public: @@ -195,7 +195,7 @@ namespace llvm { bool isConsistent() const { return Consistent; } /// getLevels - Returns the number of common loops surrounding the - /// souce and destination of the dependence. + /// source and destination of the dependence. unsigned getLevels() const { return Levels; } /// getDirection - Returns the direction associated with a particular @@ -505,7 +505,7 @@ namespace llvm { /// isKnownPredicate - Compare X and Y using the predicate Pred. /// Basically a wrapper for SCEV::isKnownPredicate, - /// but tries harder, especially in the presense of sign and zero + /// but tries harder, especially in the presence of sign and zero /// extensions and symbolics. bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, @@ -673,7 +673,7 @@ namespace llvm { /// where i and j are induction variable, c1 and c2 are loop invariant, /// and a and b are constants. /// Returns true if any possible dependence is disproved. - /// Marks the result as inconsistant. + /// Marks the result as inconsistent. /// Works in some cases that symbolicRDIVtest doesn't, /// and vice versa. bool exactRDIVtest(const SCEV *SrcCoeff, @@ -689,7 +689,7 @@ namespace llvm { /// where i and j are induction variable, c1 and c2 are loop invariant, /// and a and b are constants. /// Returns true if any possible dependence is disproved. - /// Marks the result as inconsistant. + /// Marks the result as inconsistent. /// Works in some cases that exactRDIVtest doesn't, /// and vice versa. Can also be used as a backup for /// ordinary SIV tests. @@ -702,7 +702,7 @@ namespace llvm { /// gcdMIVtest - Tests an MIV subscript pair for dependence. /// Returns true if any possible dependence is disproved. - /// Marks the result as inconsistant. + /// Marks the result as inconsistent. /// Can sometimes disprove the equal direction for 1 or more loops. // Can handle some symbolics that even the SIV tests don't get, /// so we use it as a backup for everything. @@ -712,7 +712,7 @@ namespace llvm { /// banerjeeMIVtest - Tests an MIV subscript pair for dependence. /// Returns true if any possible dependence is disproved. - /// Marks the result as inconsistant. + /// Marks the result as inconsistent. /// Computes directions. bool banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h deleted file mode 100644 index f195d27824..0000000000 --- a/include/llvm/Analysis/LoopDependenceAnalysis.h +++ /dev/null @@ -1,124 +0,0 @@ -//===- llvm/Analysis/LoopDependenceAnalysis.h --------------- -*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// LoopDependenceAnalysis is an LLVM pass that analyses dependences in memory -// accesses in loops. -// -// Please note that this is work in progress and the interface is subject to -// change. -// -// TODO: adapt as interface progresses -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H -#define LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H - -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Allocator.h" - -namespace llvm { - -class AliasAnalysis; -class AnalysisUsage; -class ScalarEvolution; -class SCEV; -class Value; -class raw_ostream; - -class LoopDependenceAnalysis : public LoopPass { - AliasAnalysis *AA; - ScalarEvolution *SE; - - /// L - The loop we are currently analysing. - Loop *L; - - /// TODO: doc - enum DependenceResult { Independent = 0, Dependent = 1, Unknown = 2 }; - - /// TODO: doc - struct Subscript { - /// TODO: Add distance, direction, breaking conditions, ... - }; - - /// DependencePair - Represents a data dependence relation between to memory - /// reference instructions. - struct DependencePair : public FastFoldingSetNode { - Value *A; - Value *B; - DependenceResult Result; - SmallVector<Subscript, 4> Subscripts; - - DependencePair(const FoldingSetNodeID &ID, Value *a, Value *b) : - FastFoldingSetNode(ID), A(a), B(b), Result(Unknown), Subscripts() {} - }; - - /// findOrInsertDependencePair - Return true if a DependencePair for the - /// given Values already exists, false if a new DependencePair had to be - /// created. The third argument is set to the pair found or created. - bool findOrInsertDependencePair(Value*, Value*, DependencePair*&); - - /// getLoops - Collect all loops of the loop nest L in which - /// a given SCEV is variant. - void getLoops(const SCEV*, DenseSet<const Loop*>*) const; - - /// isLoopInvariant - True if a given SCEV is invariant in all loops of the - /// loop nest starting at the innermost loop L. - bool isLoopInvariant(const SCEV*) const; - - /// isAffine - An SCEV is affine with respect to the loop nest starting at - /// the innermost loop L if it is of the form A+B*X where A, B are invariant - /// in the loop nest and X is a induction variable in the loop nest. - bool isAffine(const SCEV*) const; - - /// TODO: doc - bool isZIVPair(const SCEV*, const SCEV*) const; - bool isSIVPair(const SCEV*, const SCEV*) const; - DependenceResult analyseZIV(const SCEV*, const SCEV*, Subscript*) const; - DependenceResult analyseSIV(const SCEV*, const SCEV*, Subscript*) const; - DependenceResult analyseMIV(const SCEV*, const SCEV*, Subscript*) const; - DependenceResult analyseSubscript(const SCEV*, const SCEV*, Subscript*) const; - DependenceResult analysePair(DependencePair*) const; - -public: - static char ID; // Class identification, replacement for typeinfo - LoopDependenceAnalysis() : LoopPass(ID) { - initializeLoopDependenceAnalysisPass(*PassRegistry::getPassRegistry()); - } - - /// isDependencePair - Check whether two values can possibly give rise to - /// a data dependence: that is the case if both are instructions accessing - /// memory and at least one of those accesses is a write. - bool isDependencePair(const Value*, const Value*) const; - - /// depends - Return a boolean indicating if there is a data dependence - /// between two instructions. - bool depends(Value*, Value*); - - bool runOnLoop(Loop*, LPPassManager&); - virtual void releaseMemory(); - virtual void getAnalysisUsage(AnalysisUsage&) const; - void print(raw_ostream&, const Module* = 0) const; - -private: - FoldingSet<DependencePair> Pairs; - BumpPtrAllocator PairAllocator; -}; // class LoopDependenceAnalysis - -// createLoopDependenceAnalysisPass - This creates an instance of the -// LoopDependenceAnalysis pass. -// -LoopPass *createLoopDependenceAnalysisPass(); - -} // namespace llvm - -#endif /* LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H */ diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 9e5d97dd7f..a842898e41 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -168,8 +168,7 @@ class ObjectSizeOffsetVisitor public: ObjectSizeOffsetVisitor(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context, bool RoundToAlign = false, - unsigned AS = 0); + LLVMContext &Context, bool RoundToAlign = false); SizeOffsetType compute(Value *V); @@ -230,7 +229,7 @@ class ObjectSizeOffsetEvaluator public: ObjectSizeOffsetEvaluator(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context, unsigned AS = 0); + LLVMContext &Context); SizeOffsetEvalType compute(Value *V); bool knownSize(SizeOffsetEvalType SizeOffset) { diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index c127830e0e..27726f49bc 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -187,10 +187,10 @@ namespace llvm { //===--------------------------------------------------------------------===// // - // createLoopDependenceAnalysisPass - This creates an instance of the - // LoopDependenceAnalysis pass. + // createCostModelAnalysisPass - This creates an instance of the + // CostModelAnalysis pass. // - LoopPass *createLoopDependenceAnalysisPass(); + FunctionPass *createCostModelAnalysisPass(); //===--------------------------------------------------------------------===// // diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h index bec9fac770..9efbafcef4 100644 --- a/include/llvm/Analysis/ProfileDataLoader.h +++ b/include/llvm/Analysis/ProfileDataLoader.h @@ -115,9 +115,6 @@ public: /// been counted yet. static const unsigned Uncounted; - /// The maximum value that can be stored in a profiling counter. - static const unsigned MaxCount; - /// getNumExecutions - Return the number of times the target program was run /// to generate this profiling data. unsigned getNumExecutions() const { return CommandLines.size(); } diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index d2df67080c..235adca021 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -628,7 +628,7 @@ namespace llvm { /// getSizeOfExpr - Return an expression for sizeof on the given type. /// - const SCEV *getSizeOfExpr(Type *AllocTy, Type *IntPtrTy); + const SCEV *getSizeOfExpr(Type *AllocTy); /// getAlignOfExpr - Return an expression for alignof on the given type. /// @@ -636,8 +636,7 @@ namespace llvm { /// getOffsetOfExpr - Return an expression for offsetof on the given field. /// - const SCEV *getOffsetOfExpr(StructType *STy, Type *IntPtrTy, - unsigned FieldNo); + const SCEV *getOffsetOfExpr(StructType *STy, unsigned FieldNo); /// getOffsetOfExpr - Return an expression for offsetof on the given field. /// @@ -874,6 +873,7 @@ namespace llvm { virtual void releaseMemory(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void print(raw_ostream &OS, const Module* = 0) const; + virtual void verifyAnalysis() const; private: FoldingSet<SCEV> UniqueSCEVs; diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 097ee3c7aa..5ace200803 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -51,41 +51,41 @@ public: enum AttrVal { // IR-Level Attributes - None = 0, ///< No attributes have been set - AddressSafety = 1, ///< Address safety checking is on. - Alignment = 2, ///< Alignment of parameter (5 bits) + None, ///< No attributes have been set + AddressSafety, ///< Address safety checking is on. + Alignment, ///< Alignment of parameter (5 bits) ///< stored as log2 of alignment with +1 bias ///< 0 means unaligned different from align 1 - AlwaysInline = 3, ///< inline=always - ByVal = 4, ///< Pass structure by value - InlineHint = 5, ///< Source said inlining was desirable - InReg = 6, ///< Force argument to be passed in register - Naked = 7, ///< Naked function - Nest = 8, ///< Nested function static chain - NoAlias = 9, ///< Considered to not alias after call - NoCapture = 10, ///< Function creates no aliases of pointer - NoImplicitFloat = 11, ///< Disable implicit floating point insts - NoInline = 12, ///< inline=never - NonLazyBind = 13, ///< Function is called early and/or + AlwaysInline, ///< inline=always + ByVal, ///< Pass structure by value + InlineHint, ///< Source said inlining was desirable + InReg, ///< Force argument to be passed in register + MinSize, ///< Function must be optimized for size first + Naked, ///< Naked function + Nest, ///< Nested function static chain + NoAlias, ///< Considered to not alias after call + NoCapture, ///< Function creates no aliases of pointer + NoImplicitFloat, ///< Disable implicit floating point insts + NoInline, ///< inline=never + NonLazyBind, ///< Function is called early and/or ///< often, so lazy binding isn't worthwhile - NoRedZone = 14, ///< Disable redzone - NoReturn = 15, ///< Mark the function as not returning - NoUnwind = 16, ///< Function doesn't unwind stack - OptimizeForSize = 17, ///< opt_size - ReadNone = 18, ///< Function does not access memory - ReadOnly = 19, ///< Function only reads from memory - ReturnsTwice = 20, ///< Function can return twice - SExt = 21, ///< Sign extended before/after call - StackAlignment = 22, ///< Alignment of stack for function (3 bits) + NoRedZone, ///< Disable redzone + NoReturn, ///< Mark the function as not returning + NoUnwind, ///< Function doesn't unwind stack + OptimizeForSize, ///< opt_size + ReadNone, ///< Function does not access memory + ReadOnly, ///< Function only reads from memory + ReturnsTwice, ///< Function can return twice + SExt, ///< Sign extended before/after call + StackAlignment, ///< Alignment of stack for function (3 bits) ///< stored as log2 of alignment with +1 bias 0 ///< means unaligned (different from ///< alignstack={1)) - StackProtect = 23, ///< Stack protection. - StackProtectReq = 24, ///< Stack protection required. - StructRet = 25, ///< Hidden pointer to structure to return - UWTable = 26, ///< Function must be in a unwind table - ZExt = 27, ///< Zero extended before/after call - ForceSizeOpt = 28 ///< Function must be optimized for size first + StackProtect, ///< Stack protection. + StackProtectReq, ///< Stack protection required. + StructRet, ///< Hidden pointer to structure to return + UWTable, ///< Function must be in a unwind table + ZExt ///< Zero extended before/after call }; private: AttributesImpl *Attrs; @@ -154,7 +154,7 @@ public: hasAttribute(Attributes::NonLazyBind) || hasAttribute(Attributes::ReturnsTwice) || hasAttribute(Attributes::AddressSafety) || - hasAttribute(Attributes::ForceSizeOpt); + hasAttribute(Attributes::MinSize); } bool operator==(const Attributes &A) const { @@ -266,7 +266,7 @@ public: .removeAttribute(Attributes::NonLazyBind) .removeAttribute(Attributes::ReturnsTwice) .removeAttribute(Attributes::AddressSafety) - .removeAttribute(Attributes::ForceSizeOpt); + .removeAttribute(Attributes::MinSize); } uint64_t Raw() const { return Bits; } diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index 20e33f74f6..076f6f39fe 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -122,6 +122,11 @@ namespace llvm { Roots.push_back(GCRoot(Num, Metadata)); } + /// removeStackRoot - Removes a root. + roots_iterator removeStackRoot(roots_iterator position) { + return Roots.erase(position); + } + /// addSafePoint - Notes the existence of a safe point. Num is the ID of the /// label just prior to the safe point (if the code generator is using /// MachineModuleInfo). diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index eab74bd301..7eb03a9301 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -445,6 +445,11 @@ public: /// Instructions with this flag set are not necessarily simple load /// instructions, they may load a value and modify it, for example. bool mayLoad(QueryType Type = AnyInBundle) const { + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_MayLoad) + return true; + } return hasProperty(MCID::MayLoad, Type); } @@ -454,6 +459,11 @@ public: /// instructions, they may store a modified value based on their operands, or /// may not actually modify anything, for example. bool mayStore(QueryType Type = AnyInBundle) const { + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_MayStore) + return true; + } return hasProperty(MCID::MayStore, Type); } diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 5a182101c1..606833cd40 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -687,6 +687,9 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) { return OS; } + // See friend declaration above. This additional declaration is required in + // order to compile LLVM with IBM xlC compiler. + hash_code hash_value(const MachineOperand &MO); } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 2c82875108..362e9afd22 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1199,9 +1199,6 @@ public: /// have to duplicate its logic everywhere it's called. bool isExactlyValue(double V) const { bool ignored; - // convert is not supported on this type - if (&Value->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) - return false; APFloat Tmp(V); Tmp.convert(Value->getValueAPF().getSemantics(), APFloat::rmNearestTiesToEven, &ignored); diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index ac760f911a..ca64124729 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -1,6 +1,4 @@ -/************************************** -** Created by Kevin from config.h.in ** -***************************************/ +/* include/llvm/Config/config.h.cmake corresponding to config.h.in. */ #ifndef CONFIG_H #define CONFIG_H diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h index b56b9cad11..7f94ef464e 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/Constants.h @@ -282,9 +282,6 @@ public: bool isExactlyValue(double V) const { bool ignored; - // convert is not supported on this type - if (&Val.getSemantics() == &APFloat::PPCDoubleDouble) - return false; APFloat FV(V); FV.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &ignored); return isExactlyValue(FV); diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h index d778556684..24ad05f17f 100644 --- a/include/llvm/DataLayout.h +++ b/include/llvm/DataLayout.h @@ -231,7 +231,9 @@ public: } /// Layout pointer alignment - unsigned getPointerABIAlignment(unsigned AS) const { + /// FIXME: The defaults need to be removed once all of + /// the backends/clients are updated. + unsigned getPointerABIAlignment(unsigned AS = 0) const { DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS); if (val == Pointers.end()) { val = Pointers.find(0); @@ -239,7 +241,9 @@ public: return val->second.ABIAlign; } /// Return target's alignment for stack-based pointers - unsigned getPointerPrefAlignment(unsigned AS) const { + /// FIXME: The defaults need to be removed once all of + /// the backends/clients are updated. + unsigned getPointerPrefAlignment(unsigned AS = 0) const { DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS); if (val == Pointers.end()) { val = Pointers.find(0); @@ -247,7 +251,9 @@ public: return val->second.PrefAlign; } /// Layout pointer size - unsigned getPointerSize(unsigned AS) const { + /// FIXME: The defaults need to be removed once all of + /// the backends/clients are updated. + unsigned getPointerSize(unsigned AS = 0) const { DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS); if (val == Pointers.end()) { val = Pointers.find(0); @@ -255,21 +261,11 @@ public: return val->second.TypeBitWidth; } /// Layout pointer size, in bits - unsigned getPointerSizeInBits(unsigned AS) const { - DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS); - if (val == Pointers.end()) { - val = Pointers.find(0); - } - return 8*val->second.TypeBitWidth; + /// FIXME: The defaults need to be removed once all of + /// the backends/clients are updated. + unsigned getPointerSizeInBits(unsigned AS = 0) const { + return getPointerSize(AS) * 8; } - /// Layout pointer size, in bits, based on the type. - /// If this function is called with a pointer type, then - /// the type size of the pointer is returned. - /// If this function is called with a vector of pointers, - /// then the type size of the pointer is returned. - /// Otherwise the type sizeo f a default pointer is returned. - unsigned getPointerTypeSizeInBits(Type* Ty) const; - /// Size examples: /// /// Type SizeInBits StoreSizeInBits AllocSizeInBits[*] @@ -345,13 +341,14 @@ public: /// unsigned getPreferredTypeAlignmentShift(Type *Ty) const; - /// getIntPtrType - Return an integer type that is the same size or - /// greater to the pointer size based on the address space. - IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace) const; + /// getIntPtrType - Return an integer type with size at least as big as that + /// of a pointer in the given address space. + IntegerType *getIntPtrType(LLVMContext &C, unsigned AddressSpace = 0) const; - /// getIntPtrType - Return an integer type that is the same size or - /// greater to the pointer size based on the Type. - IntegerType *getIntPtrType(Type *) const; + /// getIntPtrType - Return an integer (vector of integer) type with size at + /// least as big as that of a pointer of the given pointer (vector of pointer) + /// type. + Type *getIntPtrType(Type *) const; /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h index 4c75b6ab97..9089646501 100644 --- a/include/llvm/ExecutionEngine/JITMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITMemoryManager.h @@ -10,7 +10,9 @@ #ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H #define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/DataTypes.h" + #include <string> namespace llvm { @@ -22,7 +24,7 @@ namespace llvm { /// memory for the code generated by the JIT. This can be reimplemented by /// clients that have a strong desire to control how the layout of JIT'd memory /// works. -class JITMemoryManager { +class JITMemoryManager : public RTDyldMemoryManager { protected: bool HasGOT; @@ -47,17 +49,6 @@ public: /// debugging, and may be turned on by default in debug mode. virtual void setPoisonMemory(bool poison) = 0; - /// getPointerToNamedFunction - This method returns the address of the - /// specified function. As such it is only useful for resolving library - /// symbols, not code generated symbols. - /// - /// If AbortOnFailure is false and no function with the given name is - /// found, this function silently returns a null pointer. Otherwise, - /// it prints a message to stderr and aborts. - /// - virtual void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) = 0; - //===--------------------------------------------------------------------===// // Global Offset Table Management //===--------------------------------------------------------------------===// @@ -112,22 +103,6 @@ public: virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, uint8_t *FunctionEnd) = 0; - /// allocateCodeSection - Allocate a memory block of (at least) the given - /// size suitable for executable code. The SectionID is a unique identifier - /// assigned by the JIT and passed through to the memory manager for - /// the instance class to use if it needs to communicate to the JIT about - /// a given section after the fact. - virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) = 0; - - /// allocateDataSection - Allocate a memory block of (at least) the given - /// size suitable for data. The SectionID is a unique identifier - /// assigned by the JIT and passed through to the memory manager for - /// the instance class to use if it needs to communicate to the JIT about - /// a given section after the fact. - virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) = 0; - /// allocateSpace - Allocate a memory block of the given size. This method /// cannot be called between calls to startFunctionBody and endFunctionBody. virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0; diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index a71b1411c8..891f534862 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -24,9 +24,9 @@ class RuntimeDyldImpl; class ObjectImage; // RuntimeDyld clients often want to handle the memory management of -// what gets placed where. For JIT clients, this is an abstraction layer -// over the JITMemoryManager, which references objects by their source -// representations in LLVM IR. +// what gets placed where. For JIT clients, this is the subset of +// JITMemoryManager required for dynamic loading of binaries. +// // FIXME: As the RuntimeDyld fills out, additional routines will be needed // for the varying types of objects to be allocated. class RTDyldMemoryManager { @@ -37,15 +37,26 @@ public: virtual ~RTDyldMemoryManager(); /// allocateCodeSection - Allocate a memory block of (at least) the given - /// size suitable for executable code. + /// size suitable for executable code. The SectionID is a unique identifier + /// assigned by the JIT engine, and optionally recorded by the memory manager + /// to access a loaded section. virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) = 0; /// allocateDataSection - Allocate a memory block of (at least) the given - /// size suitable for data. + /// size suitable for data. The SectionID is a unique identifier + /// assigned by the JIT engine, and optionally recorded by the memory manager + /// to access a loaded section. virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) = 0; + /// getPointerToNamedFunction - This method returns the address of the + /// specified function. As such it is only useful for resolving library + /// symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function returns a null pointer. Otherwise, it prints a + /// message to stderr and aborts. virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true) = 0; }; diff --git a/include/llvm/IRBuilder.h b/include/llvm/IRBuilder.h index 46720983e4..f63a16051e 100644 --- a/include/llvm/IRBuilder.h +++ b/include/llvm/IRBuilder.h @@ -17,6 +17,7 @@ #include "llvm/Instructions.h" #include "llvm/BasicBlock.h" +#include "llvm/DataLayout.h" #include "llvm/LLVMContext.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" @@ -266,6 +267,10 @@ public: return Type::getInt8PtrTy(Context, AddrSpace); } + IntegerType* getIntPtrTy(DataLayout *DL, unsigned AddrSpace = 0) { + return DL->getIntPtrType(Context, AddrSpace); + } + //===--------------------------------------------------------------------===// // Intrinsic creation methods //===--------------------------------------------------------------------===// diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 99bf8be1bf..a6b7d31817 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -88,6 +88,7 @@ void initializeCodePlacementOptPass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeMachineCopyPropagationPass(PassRegistry&); +void initializeCostModelAnalysisPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); @@ -147,7 +148,6 @@ void initializeProfileMetadataLoaderPassPass(PassRegistry&); void initializePathProfileLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopDeletionPass(PassRegistry&); -void initializeLoopDependenceAnalysisPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopInfoPass(PassRegistry&); void initializeLoopInstSimplifyPass(PassRegistry&); diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index c6e0aab05e..b5e0fd4eff 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -214,6 +214,8 @@ public: Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, Extra_AsmDialect = 4, + Extra_MayLoad = 8, + Extra_MayStore = 16, // Inline asm operands map to multiple SDNode / MachineInstr operands. // The first operand is an immediate describing the asm operand, the low diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index b661372f53..da17f3b80d 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -17,7 +17,6 @@ #define LLVM_INSTRUCTION_TYPES_H #include "llvm/Instruction.h" -#include "llvm/DataLayout.h" #include "llvm/OperandTraits.h" #include "llvm/DerivedTypes.h" #include "llvm/ADT/Twine.h" @@ -577,11 +576,6 @@ public: Type *IntPtrTy ///< Integer type corresponding to pointer ) const; - /// @brief Determine if this cast is a no-op cast. - bool isNoopCast( - const DataLayout &DL ///< DataLayout to get the Int Ptr type from. - ) const; - /// Determine how a pair of casts can be eliminated, if they can be at all. /// This is a helper function for both CastInst and ConstantExpr. /// @returns 0 if the CastInst pair can't be eliminated, otherwise @@ -594,7 +588,9 @@ public: Type *SrcTy, ///< SrcTy of 1st cast Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast Type *DstTy, ///< DstTy of 2nd cast - Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null + Type *SrcIntPtrTy, ///< Integer type corresponding to Ptr SrcTy, or null + Type *MidIntPtrTy, ///< Integer type corresponding to Ptr MidTy, or null + Type *DstIntPtrTy ///< Integer type corresponding to Ptr DstTy, or null ); /// @brief Return the opcode of this CastInst diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index 40dbbaabe6..69593b48c1 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -225,8 +225,9 @@ public: const Value *getPointerOperand() const { return getOperand(0); } static unsigned getPointerOperandIndex() { return 0U; } + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace(); + return getPointerOperand()->getType()->getPointerAddressSpace(); } @@ -347,17 +348,9 @@ public: const Value *getPointerOperand() const { return getOperand(1); } static unsigned getPointerOperandIndex() { return 1U; } + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - if (getPointerOperand()->getType()->isPointerTy()) - return cast<PointerType>(getPointerOperand()->getType()) - ->getAddressSpace(); - if (getPointerOperand()->getType()->isVectorTy() - && cast<VectorType>(getPointerOperand()->getType())->isPointerTy()) - return cast<PointerType>(cast<VectorType>( - getPointerOperand()->getType())->getElementType()) - ->getAddressSpace(); - llvm_unreachable("Only a vector of pointers or pointers can be used!"); - return 0; + return getPointerOperand()->getType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -526,8 +519,9 @@ public: Value *getNewValOperand() { return getOperand(2); } const Value *getNewValOperand() const { return getOperand(2); } + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace(); + return getPointerOperand()->getType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -669,8 +663,9 @@ public: Value *getValOperand() { return getOperand(1); } const Value *getValOperand() const { return getOperand(1); } + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace(); + return getPointerOperand()->getType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -771,6 +766,13 @@ public: return reinterpret_cast<PointerType*>(Instruction::getType()); } + /// \brief Returns the address space of this instruction's pointer type. + unsigned getAddressSpace() const { + // Note that this is always the same as the pointer operand's address space + // and that is cheaper to compute, so cheat here. + return getPointerAddressSpace(); + } + /// getIndexedType - Returns the type of the element that would be loaded with /// a load instruction with the specified parameters. /// @@ -781,10 +783,6 @@ public: static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList); static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList); - /// getAddressSpace - Returns the address space used by the GEP pointer. - /// - static unsigned getAddressSpace(Value *Ptr); - inline op_iterator idx_begin() { return op_begin()+1; } inline const_op_iterator idx_begin() const { return op_begin()+1; } inline op_iterator idx_end() { return op_end(); } @@ -800,22 +798,23 @@ public: return 0U; // get index for modifying correct operand. } - unsigned getPointerAddressSpace() const { - return cast<PointerType>(getPointerOperandType())->getAddressSpace(); - } - /// getPointerOperandType - Method to return the pointer operand as a /// PointerType. Type *getPointerOperandType() const { return getPointerOperand()->getType(); } + /// \brief Returns the address space of the pointer operand. + unsigned getPointerAddressSpace() const { + return getPointerOperandType()->getPointerAddressSpace(); + } + /// GetGEPReturnType - Returns the pointer type returned by the GEP /// instruction, which may be a vector of pointers. static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) { Type *PtrTy = PointerType::get(checkGEPType( getIndexedType(Ptr->getType(), IdxList)), - getAddressSpace(Ptr)); + Ptr->getType()->getPointerAddressSpace()); // Vector GEP if (Ptr->getType()->isVectorTy()) { unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements(); @@ -899,13 +898,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// This instruction compares its operands according to the predicate given /// to the constructor. It only operates on integers or pointers. The operands /// must be identical types. -/// @brief Represent an integer comparison operator. +/// \brief Represent an integer comparison operator. class ICmpInst: public CmpInst { protected: - /// @brief Clone an identical ICmpInst + /// \brief Clone an identical ICmpInst virtual ICmpInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics. + /// \brief Constructor with insert-before-instruction semantics. ICmpInst( Instruction *InsertBefore, ///< Where to insert Predicate pred, ///< The predicate to use for the comparison @@ -926,7 +925,7 @@ public: "Invalid operand types for ICmp instruction"); } - /// @brief Constructor with insert-at-end semantics. + /// \brief Constructor with insert-at-end semantics. ICmpInst( BasicBlock &InsertAtEnd, ///< Block to insert into. Predicate pred, ///< The predicate to use for the comparison @@ -947,7 +946,7 @@ public: "Invalid operand types for ICmp instruction"); } - /// @brief Constructor with no-insertion semantics + /// \brief Constructor with no-insertion semantics ICmpInst( Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression @@ -969,25 +968,25 @@ public: /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc. /// @returns the predicate that would be the result if the operand were /// regarded as signed. - /// @brief Return the signed version of the predicate + /// \brief Return the signed version of the predicate Predicate getSignedPredicate() const { return getSignedPredicate(getPredicate()); } /// This is a static version that you can use without an instruction. - /// @brief Return the signed version of the predicate. + /// \brief Return the signed version of the predicate. static Predicate getSignedPredicate(Predicate pred); /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc. /// @returns the predicate that would be the result if the operand were /// regarded as unsigned. - /// @brief Return the unsigned version of the predicate + /// \brief Return the unsigned version of the predicate Predicate getUnsignedPredicate() const { return getUnsignedPredicate(getPredicate()); } /// This is a static version that you can use without an instruction. - /// @brief Return the unsigned version of the predicate. + /// \brief Return the unsigned version of the predicate. static Predicate getUnsignedPredicate(Predicate pred); /// isEquality - Return true if this predicate is either EQ or NE. This also @@ -1003,7 +1002,7 @@ public: } /// @returns true if the predicate of this ICmpInst is commutative - /// @brief Determine if this relation is commutative. + /// \brief Determine if this relation is commutative. bool isCommutative() const { return isEquality(); } /// isRelational - Return true if the predicate is relational (not EQ or NE). @@ -1019,14 +1018,14 @@ public: } /// Initialize a set of values that all satisfy the predicate with C. - /// @brief Make a ConstantRange for a relation with a constant value. + /// \brief Make a ConstantRange for a relation with a constant value. static ConstantRange makeConstantRange(Predicate pred, const APInt &C); /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent /// (e.g. ult). - /// @brief Swap operands and adjust predicate. + /// \brief Swap operands and adjust predicate. void swapOperands() { setPredicate(getSwappedPredicate()); Op<0>().swap(Op<1>()); @@ -1049,13 +1048,13 @@ public: /// This instruction compares its operands according to the predicate given /// to the constructor. It only operates on floating point values or packed /// vectors of floating point values. The operands must be identical types. -/// @brief Represents a floating point comparison operator. +/// \brief Represents a floating point comparison operator. class FCmpInst: public CmpInst { protected: - /// @brief Clone an identical FCmpInst + /// \brief Clone an identical FCmpInst virtual FCmpInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics. + /// \brief Constructor with insert-before-instruction semantics. FCmpInst( Instruction *InsertBefore, ///< Where to insert Predicate pred, ///< The predicate to use for the comparison @@ -1074,7 +1073,7 @@ public: "Invalid operand types for FCmp instruction"); } - /// @brief Constructor with insert-at-end semantics. + /// \brief Constructor with insert-at-end semantics. FCmpInst( BasicBlock &InsertAtEnd, ///< Block to insert into. Predicate pred, ///< The predicate to use for the comparison @@ -1093,7 +1092,7 @@ public: "Invalid operand types for FCmp instruction"); } - /// @brief Constructor with no-insertion semantics + /// \brief Constructor with no-insertion semantics FCmpInst( Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression @@ -1111,14 +1110,14 @@ public: } /// @returns true if the predicate of this instruction is EQ or NE. - /// @brief Determine if this is an equality predicate. + /// \brief Determine if this is an equality predicate. bool isEquality() const { return getPredicate() == FCMP_OEQ || getPredicate() == FCMP_ONE || getPredicate() == FCMP_UEQ || getPredicate() == FCMP_UNE; } /// @returns true if the predicate of this instruction is commutative. - /// @brief Determine if this is a commutative predicate. + /// \brief Determine if this is a commutative predicate. bool isCommutative() const { return isEquality() || getPredicate() == FCMP_FALSE || @@ -1128,20 +1127,20 @@ public: } /// @returns true if the predicate is relational (not EQ or NE). - /// @brief Determine if this a relational predicate. + /// \brief Determine if this a relational predicate. bool isRelational() const { return !isEquality(); } /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent /// (e.g. ult). - /// @brief Swap operands and adjust predicate. + /// \brief Swap operands and adjust predicate. void swapOperands() { setPredicate(getSwappedPredicate()); Op<0>().swap(Op<1>()); } - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; } @@ -1163,12 +1162,12 @@ class CallInst : public Instruction { void init(Value *Func, const Twine &NameStr); /// Construct a CallInst given a range of arguments. - /// @brief Construct a CallInst from a range of arguments + /// \brief Construct a CallInst from a range of arguments inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr, Instruction *InsertBefore); /// Construct a CallInst given a range of arguments. - /// @brief Construct a CallInst from a range of arguments + /// \brief Construct a CallInst from a range of arguments inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr, BasicBlock *InsertAtEnd); @@ -1267,25 +1266,25 @@ public: /// removeAttribute - removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attributes attr); - /// @brief Determine whether this call has the given attribute. + /// \brief Determine whether this call has the given attribute. bool hasFnAttr(Attributes::AttrVal A) const; - /// @brief Determine whether the call or the callee has the given attributes. + /// \brief Determine whether the call or the callee has the given attributes. bool paramHasAttr(unsigned i, Attributes::AttrVal A) const; - /// @brief Extract the alignment for a call or parameter (0=unknown). + /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { return AttributeList.getParamAlignment(i); } - /// @brief Return true if the call should not be inlined. + /// \brief Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attributes::NoInline); } void setIsNoInline() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoInline)); } - /// @brief Return true if the call can return twice + /// \brief Return true if the call can return twice bool canReturnTwice() const { return hasFnAttr(Attributes::ReturnsTwice); } @@ -1294,7 +1293,7 @@ public: Attributes::get(getContext(), Attributes::ReturnsTwice)); } - /// @brief Determine if the call does not access memory. + /// \brief Determine if the call does not access memory. bool doesNotAccessMemory() const { return hasFnAttr(Attributes::ReadNone); } @@ -1303,7 +1302,7 @@ public: Attributes::get(getContext(), Attributes::ReadNone)); } - /// @brief Determine if the call does not access or only reads memory. + /// \brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly); } @@ -1312,28 +1311,28 @@ public: Attributes::get(getContext(), Attributes::ReadOnly)); } - /// @brief Determine if the call cannot return. + /// \brief Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); } void setDoesNotReturn() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoReturn)); } - /// @brief Determine if the call cannot unwind. + /// \brief Determine if the call cannot unwind. bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); } void setDoesNotThrow() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoUnwind)); } - /// @brief Determine if the call returns a structure through first + /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { // Be friendly and also check the callee. return paramHasAttr(1, Attributes::StructRet); } - /// @brief Determine if any call argument is an aggregate passed by value. + /// \brief Determine if any call argument is an aggregate passed by value. bool hasByValArgument() const { for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I) if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal)) @@ -2950,14 +2949,14 @@ class InvokeInst : public TerminatorInst { /// Construct an InvokeInst given a range of arguments. /// - /// @brief Construct an InvokeInst from a range of arguments + /// \brief Construct an InvokeInst from a range of arguments inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef<Value *> Args, unsigned Values, const Twine &NameStr, Instruction *InsertBefore); /// Construct an InvokeInst given a range of arguments. /// - /// @brief Construct an InvokeInst from a range of arguments + /// \brief Construct an InvokeInst from a range of arguments inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef<Value *> Args, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd); @@ -3016,25 +3015,25 @@ public: /// removeAttribute - removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attributes attr); - /// @brief Determine whether this call has the NoAlias attribute. + /// \brief Determine whether this call has the NoAlias attribute. bool hasFnAttr(Attributes::AttrVal A) const; - /// @brief Determine whether the call or the callee has the given attributes. + /// \brief Determine whether the call or the callee has the given attributes. bool paramHasAttr(unsigned i, Attributes::AttrVal A) const; - /// @brief Extract the alignment for a call or parameter (0=unknown). + /// \brief Extract the alignment for a call or parameter (0=unknown). unsigned getParamAlignment(unsigned i) const { return AttributeList.getParamAlignment(i); } - /// @brief Return true if the call should not be inlined. + /// \brief Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attributes::NoInline); } void setIsNoInline() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoInline)); } - /// @brief Determine if the call does not access memory. + /// \brief Determine if the call does not access memory. bool doesNotAccessMemory() const { return hasFnAttr(Attributes::ReadNone); } @@ -3043,7 +3042,7 @@ public: Attributes::get(getContext(), Attributes::ReadNone)); } - /// @brief Determine if the call does not access or only reads memory. + /// \brief Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly); } @@ -3052,28 +3051,28 @@ public: Attributes::get(getContext(), Attributes::ReadOnly)); } - /// @brief Determine if the call cannot return. + /// \brief Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); } void setDoesNotReturn() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoReturn)); } - /// @brief Determine if the call cannot unwind. + /// \brief Determine if the call cannot unwind. bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); } void setDoesNotThrow() { addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), Attributes::NoUnwind)); } - /// @brief Determine if the call returns a structure through first + /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { // Be friendly and also check the callee. return paramHasAttr(1, Attributes::StructRet); } - /// @brief Determine if any call argument is an aggregate passed by value. + /// \brief Determine if any call argument is an aggregate passed by value. bool hasByValArgument() const { for (unsigned I = 0, E = AttributeList.getNumAttrs(); I != E; ++I) if (AttributeList.getAttributesAtIndex(I).hasAttribute(Attributes::ByVal)) @@ -3268,14 +3267,14 @@ private: // TruncInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a truncation of integer types. +/// \brief This class represents a truncation of integer types. class TruncInst : public CastInst { protected: - /// @brief Clone an identical TruncInst + /// \brief Clone an identical TruncInst virtual TruncInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics TruncInst( Value *S, ///< The value to be truncated Type *Ty, ///< The (smaller) type to truncate to @@ -3283,7 +3282,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics TruncInst( Value *S, ///< The value to be truncated Type *Ty, ///< The (smaller) type to truncate to @@ -3291,7 +3290,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Trunc; } @@ -3304,14 +3303,14 @@ public: // ZExtInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents zero extension of integer types. +/// \brief This class represents zero extension of integer types. class ZExtInst : public CastInst { protected: - /// @brief Clone an identical ZExtInst + /// \brief Clone an identical ZExtInst virtual ZExtInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics ZExtInst( Value *S, ///< The value to be zero extended Type *Ty, ///< The type to zero extend to @@ -3319,7 +3318,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end semantics. + /// \brief Constructor with insert-at-end semantics. ZExtInst( Value *S, ///< The value to be zero extended Type *Ty, ///< The type to zero extend to @@ -3327,7 +3326,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == ZExt; } @@ -3340,14 +3339,14 @@ public: // SExtInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a sign extension of integer types. +/// \brief This class represents a sign extension of integer types. class SExtInst : public CastInst { protected: - /// @brief Clone an identical SExtInst + /// \brief Clone an identical SExtInst virtual SExtInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics SExtInst( Value *S, ///< The value to be sign extended Type *Ty, ///< The type to sign extend to @@ -3355,7 +3354,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics SExtInst( Value *S, ///< The value to be sign extended Type *Ty, ///< The type to sign extend to @@ -3363,7 +3362,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == SExt; } @@ -3376,14 +3375,14 @@ public: // FPTruncInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a truncation of floating point types. +/// \brief This class represents a truncation of floating point types. class FPTruncInst : public CastInst { protected: - /// @brief Clone an identical FPTruncInst + /// \brief Clone an identical FPTruncInst virtual FPTruncInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics FPTruncInst( Value *S, ///< The value to be truncated Type *Ty, ///< The type to truncate to @@ -3391,7 +3390,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics FPTruncInst( Value *S, ///< The value to be truncated Type *Ty, ///< The type to truncate to @@ -3399,7 +3398,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == FPTrunc; } @@ -3412,14 +3411,14 @@ public: // FPExtInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents an extension of floating point types. +/// \brief This class represents an extension of floating point types. class FPExtInst : public CastInst { protected: - /// @brief Clone an identical FPExtInst + /// \brief Clone an identical FPExtInst virtual FPExtInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics FPExtInst( Value *S, ///< The value to be extended Type *Ty, ///< The type to extend to @@ -3427,7 +3426,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics FPExtInst( Value *S, ///< The value to be extended Type *Ty, ///< The type to extend to @@ -3435,7 +3434,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == FPExt; } @@ -3448,14 +3447,14 @@ public: // UIToFPInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast unsigned integer to floating point. +/// \brief This class represents a cast unsigned integer to floating point. class UIToFPInst : public CastInst { protected: - /// @brief Clone an identical UIToFPInst + /// \brief Clone an identical UIToFPInst virtual UIToFPInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics UIToFPInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3463,7 +3462,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics UIToFPInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3471,7 +3470,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == UIToFP; } @@ -3484,14 +3483,14 @@ public: // SIToFPInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast from signed integer to floating point. +/// \brief This class represents a cast from signed integer to floating point. class SIToFPInst : public CastInst { protected: - /// @brief Clone an identical SIToFPInst + /// \brief Clone an identical SIToFPInst virtual SIToFPInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics SIToFPInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3499,7 +3498,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics SIToFPInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3507,7 +3506,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == SIToFP; } @@ -3520,14 +3519,14 @@ public: // FPToUIInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast from floating point to unsigned integer +/// \brief This class represents a cast from floating point to unsigned integer class FPToUIInst : public CastInst { protected: - /// @brief Clone an identical FPToUIInst + /// \brief Clone an identical FPToUIInst virtual FPToUIInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics FPToUIInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3535,7 +3534,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics FPToUIInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3543,7 +3542,7 @@ public: BasicBlock *InsertAtEnd ///< Where to insert the new instruction ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == FPToUI; } @@ -3556,14 +3555,14 @@ public: // FPToSIInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast from floating point to signed integer. +/// \brief This class represents a cast from floating point to signed integer. class FPToSIInst : public CastInst { protected: - /// @brief Clone an identical FPToSIInst + /// \brief Clone an identical FPToSIInst virtual FPToSIInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics FPToSIInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3571,7 +3570,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics FPToSIInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3579,7 +3578,7 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: + /// \brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == FPToSI; } @@ -3592,10 +3591,10 @@ public: // IntToPtrInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast from an integer to a pointer. +/// \brief This class represents a cast from an integer to a pointer. class IntToPtrInst : public CastInst { public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics IntToPtrInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3603,7 +3602,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics IntToPtrInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3611,20 +3610,12 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief Clone an identical IntToPtrInst + /// \brief Clone an identical IntToPtrInst virtual IntToPtrInst *clone_impl() const; - /// @brief return the address space of the pointer. + /// \brief Returns the address space of this instruction's pointer type. unsigned getAddressSpace() const { - if (getType()->isPointerTy()) - return cast<PointerType>(getType())->getAddressSpace(); - if (getType()->isVectorTy() && - cast<VectorType>(getType())->getElementType()->isPointerTy()) - return cast<PointerType>( - cast<VectorType>(getType())->getElementType()) - ->getAddressSpace(); - llvm_unreachable("Must be a pointer or a vector of pointers."); - return 0; + return getType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -3640,14 +3631,14 @@ public: // PtrToIntInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a cast from a pointer to an integer +/// \brief This class represents a cast from a pointer to an integer class PtrToIntInst : public CastInst { protected: - /// @brief Clone an identical PtrToIntInst + /// \brief Clone an identical PtrToIntInst virtual PtrToIntInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics PtrToIntInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3655,7 +3646,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics PtrToIntInst( Value *S, ///< The value to be converted Type *Ty, ///< The type to convert to @@ -3663,18 +3654,16 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// @brief return the address space of the pointer. + /// \brief Gets the pointer operand. + Value *getPointerOperand() { return getOperand(0); } + /// \brief Gets the pointer operand. + const Value *getPointerOperand() const { return getOperand(0); } + /// \brief Gets the operand index of the pointer operand. + static unsigned getPointerOperandIndex() { return 0U; } + + /// \brief Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - Type *Ty = getOperand(0)->getType(); - if (Ty->isPointerTy()) - return cast<PointerType>(Ty)->getAddressSpace(); - if (Ty->isVectorTy() - && cast<VectorType>(Ty)->getElementType()->isPointerTy()) - return cast<PointerType>( - cast<VectorType>(Ty)->getElementType()) - ->getAddressSpace(); - llvm_unreachable("Must be a pointer or a vector of pointers."); - return 0; + return getPointerOperand()->getType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -3690,14 +3679,14 @@ public: // BitCastInst Class //===----------------------------------------------------------------------===// -/// @brief This class represents a no-op cast from one type to another. +/// \brief This class represents a no-op cast from one type to another. class BitCastInst : public CastInst { protected: - /// @brief Clone an identical BitCastInst + /// \brief Clone an identical BitCastInst virtual BitCastInst *clone_impl() const; public: - /// @brief Constructor with insert-before-instruction semantics + /// \brief Constructor with insert-before-instruction semantics BitCastInst( Value *S, ///< The value to be casted Type *Ty, ///< The type to casted to @@ -3705,7 +3694,7 @@ public: Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); - /// @brief Constructor with insert-at-end-of-block semantics + /// \brief Constructor with insert-at-end-of-block semantics BitCastInst( Value *S, ///< The value to be casted Type *Ty, ///< The type to casted to diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h index a31220355f..9b2afd56e0 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IntrinsicInst.h @@ -268,6 +268,49 @@ namespace llvm { } }; + /// VAStartInst - This represents the llvm.va_start intrinsic. + /// + class VAStartInst : public IntrinsicInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::vastart; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); } + }; + + /// VAEndInst - This represents the llvm.va_end intrinsic. + /// + class VAEndInst : public IntrinsicInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::vaend; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + Value *getArgList() const { return const_cast<Value*>(getArgOperand(0)); } + }; + + /// VACopyInst - This represents the llvm.va_copy intrinsic. + /// + class VACopyInst : public IntrinsicInst { + public: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::vacopy; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + Value *getDest() const { return const_cast<Value*>(getArgOperand(0)); } + Value *getSrc() const { return const_cast<Value*>(getArgOperand(1)); } + }; + } #endif diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 8652acd941..806e4b37b7 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -60,6 +60,7 @@ namespace { (void) llvm::createCFGSimplificationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); + (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDeadInstEliminationPass(); @@ -82,11 +83,10 @@ namespace { (void) llvm::createIPSCCPPass(); (void) llvm::createIndVarSimplifyPass(); (void) llvm::createInstructionCombiningPass(); - (void) llvm::createInternalizePass(false); + (void) llvm::createInternalizePass(); (void) llvm::createLCSSAPass(); (void) llvm::createLICMPass(); (void) llvm::createLazyValueInfoPass(); - (void) llvm::createLoopDependenceAnalysisPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopStrengthReducePass(); diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 59a2501b8e..8b0f191792 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -102,9 +102,9 @@ public: /// @name Accessors /// @{ - uint8_t getOSABI() { return OSABI; } - uint16_t getEMachine() { return EMachine; } - bool hasRelocationAddend() { return HasRelocationAddend; } + uint8_t getOSABI() const { return OSABI; } + uint16_t getEMachine() const { return EMachine; } + bool hasRelocationAddend() const { return HasRelocationAddend; } bool is64Bit() const { return Is64Bit; } bool isN64() const { return IsN64; } /// @} diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 8a5f37cb0c..a71d3c3217 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -37,6 +37,8 @@ public: virtual ~MCAsmParserSemaCallback(); virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc, unsigned &Size) = 0; + virtual bool LookupInlineAsmField(StringRef Base, StringRef Member, + unsigned &Offset) = 0; }; /// MCAsmParser - Generic assembler parser interface, for use by target specific diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h index 89b0a1f47b..60e7887a53 100644 --- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -64,6 +64,12 @@ public: /// getEndLoc - Get the location of the last token of this operand. virtual SMLoc getEndLoc() const = 0; + /// needAsmRewrite - AsmRewrites happen in both the target-independent and + /// target-dependent parsers. The target-independent parser calls this + /// function to determine if the target-dependent parser has already taken + /// care of the rewrites. Only valid when parsing MS-style inline assembly. + virtual bool needAsmRewrite() const { return true; } + /// isOffsetOf - Do we need to emit code to get the offset of the variable, /// rather then the value of the variable? Only valid when parsing MS-style /// inline assembly. diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 6749bdffc2..f05baeaaf6 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -370,7 +370,7 @@ public: /// getRegClass - Returns the register class associated with the enumeration /// value. See class MCOperandInfo. - const MCRegisterClass getRegClass(unsigned i) const { + const MCRegisterClass& getRegClass(unsigned i) const { assert(i < getNumRegClasses() && "Register Class ID out of range"); return Classes[i]; } diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h index 05537f9211..483a80b3b5 100644 --- a/include/llvm/MC/MCTargetAsmParser.h +++ b/include/llvm/MC/MCTargetAsmParser.h @@ -21,6 +21,39 @@ class MCParsedAsmOperand; class MCInst; template <typename T> class SmallVectorImpl; +enum AsmRewriteKind { + AOK_DotOperator, // Rewrite a dot operator expression as an immediate. + // E.g., [eax].foo.bar -> [eax].8 + AOK_Emit, // Rewrite _emit as .byte. + AOK_Imm, // Rewrite as $$N. + AOK_ImmPrefix, // Add $$ before a parsed Imm. + AOK_Input, // Rewrite in terms of $N. + AOK_Output, // Rewrite in terms of $N. + AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). + AOK_Skip // Skip emission (e.g., offset/type operators). +}; + +struct AsmRewrite { + AsmRewriteKind Kind; + SMLoc Loc; + unsigned Len; + unsigned Val; +public: + AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, unsigned val = 0) + : Kind(kind), Loc(loc), Len(len), Val(val) {} +}; + +struct ParseInstructionInfo { + + SmallVectorImpl<AsmRewrite> *AsmRewrites; + + ParseInstructionInfo() : AsmRewrites(0) {} + ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) + : AsmRewrites(rewrites) {} + + ~ParseInstructionInfo() {} +}; + /// MCTargetAsmParser - Generic interface to target specific assembly parsers. class MCTargetAsmParser : public MCAsmParserExtension { public: @@ -77,7 +110,8 @@ public: /// \param Operands [out] - The list of parsed operands, this returns /// ownership of them to the caller. /// \return True on failure. - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0; /// ParseDirective - Parse a target specific assembler directive diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index d6b92ed021..6f42d76ee9 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -116,6 +116,7 @@ protected: virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; virtual error_code getSymbolSection(DataRefImpl Symb, section_iterator &Res) const; + virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const; virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 204348c0c5..466de93a78 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -620,6 +620,7 @@ protected: virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; virtual error_code getSymbolSection(DataRefImpl Symb, section_iterator &Res) const; + virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const; friend class DynRefImpl<target_endianness, is64Bits>; virtual error_code getDynNext(DataRefImpl DynData, DynRef &Result) const; @@ -1162,6 +1163,16 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::getSymbolValue(DataRefImpl Symb, + uint64_t &Val) const { + validateSymbol(Symb); + const Elf_Sym *symb = getSymbol(Symb); + Val = symb->st_value; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p); sec += Header->e_shentsize; diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index 97cd4191aa..4e03daab16 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -61,6 +61,7 @@ protected: virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const; virtual error_code getSymbolSection(DataRefImpl Symb, section_iterator &Res) const; + virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const; virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const; virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const; diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 4185ffea13..1a3120ab8b 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -234,6 +234,9 @@ public: /// end_sections() if it is undefined or is an absolute symbol. error_code getSection(section_iterator &Result) const; + /// @brief Get value of the symbol in the symbol table. + error_code getValue(uint64_t &Val) const; + DataRefImpl getRawDataRefImpl() const; }; typedef content_iterator<SymbolRef> symbol_iterator; @@ -300,6 +303,7 @@ protected: uint32_t &Res) const = 0; virtual error_code getSymbolSection(DataRefImpl Symb, section_iterator &Res) const = 0; + virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const = 0; // Same as above for SectionRef. friend class SectionRef; @@ -444,6 +448,10 @@ inline error_code SymbolRef::getType(SymbolRef::Type &Result) const { return OwningObject->getSymbolType(SymbolPimpl, Result); } +inline error_code SymbolRef::getValue(uint64_t &Val) const { + return OwningObject->getSymbolValue(SymbolPimpl, Val); +} + inline DataRefImpl SymbolRef::getRawDataRefImpl() const { return SymbolPimpl; } diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h index 22c07d04fa..d6b0ab8b37 100644 --- a/include/llvm/Support/AlignOf.h +++ b/include/llvm/Support/AlignOf.h @@ -78,7 +78,7 @@ template <size_t Alignment> struct AlignedCharArrayImpl; template <> struct AlignedCharArrayImpl<x> { \ char alignas(x) aligned; \ } -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__IBM_ATTRIBUTES) #define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \ template <> struct AlignedCharArrayImpl<x> { \ char aligned __attribute__((aligned(x))); \ diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 8f7d879f2e..872c57998c 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -1507,7 +1507,7 @@ class bits : public Option, public bits_storage<DataType, Storage> { typename ParserClass::parser_data_type(); if (Parser.parse(*this, ArgName, Arg, Val)) return true; // Parse Error! - addValue(Val); + this->addValue(Val); setPosition(pos); Positions.push_back(pos); return false; diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index dbd1091629..a67a6ac09e 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -690,8 +690,36 @@ enum { R_MIPS_NUM = 218 }; +// Hexagon Specific e_flags +// Release 5 ABI +enum { + // Object processor version flags, bits[3:0] + EF_HEXAGON_MACH_V2 = 0x00000001, // Hexagon V2 + EF_HEXAGON_MACH_V3 = 0x00000002, // Hexagon V3 + EF_HEXAGON_MACH_V4 = 0x00000003, // Hexagon V4 + EF_HEXAGON_MACH_V5 = 0x00000004, // Hexagon V5 + + // Highest ISA version flags + EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[3:0] + // of e_flags + EF_HEXAGON_ISA_V2 = 0x00000010, // Hexagon V2 ISA + EF_HEXAGON_ISA_V3 = 0x00000020, // Hexagon V3 ISA + EF_HEXAGON_ISA_V4 = 0x00000030, // Hexagon V4 ISA + EF_HEXAGON_ISA_V5 = 0x00000040 // Hexagon V5 ISA +}; + +// Hexagon specific Section indexes for common small data +// Release 5 ABI +enum { + SHN_HEXAGON_SCOMMON = 0xff00, // Other access sizes + SHN_HEXAGON_SCOMMON_1 = 0xff01, // Byte-sized access + SHN_HEXAGON_SCOMMON_2 = 0xff02, // Half-word-sized access + SHN_HEXAGON_SCOMMON_4 = 0xff03, // Word-sized access + SHN_HEXAGON_SCOMMON_8 = 0xff04 // Double-word-size access +}; + // ELF Relocation types for Hexagon -// Release 5 ABI - Document: 80-V9418-3 Rev. J +// Release 5 ABI enum { R_HEX_NONE = 0, R_HEX_B22_PCREL = 1, @@ -1119,6 +1147,9 @@ enum { PT_PHDR = 6, // The program header table itself. PT_TLS = 7, // The thread-local storage template. PT_LOOS = 0x60000000, // Lowest operating system-specific pt entry type. + PT_HIOS = 0x6fffffff, // Highest operating system-specific pt entry type. + PT_LOPROC = 0x70000000, // Lowest processor-specific program hdr entry type. + PT_HIPROC = 0x7fffffff, // Highest processor-specific program hdr entry type. // x86-64 program header types. // These all contain stack unwind tables. @@ -1129,9 +1160,11 @@ enum { PT_GNU_STACK = 0x6474e551, // Indicates stack executability. PT_GNU_RELRO = 0x6474e552, // Read-only after relocation. - PT_HIOS = 0x6fffffff, // Highest operating system-specific pt entry type. - PT_LOPROC = 0x70000000, // Lowest processor-specific program hdr entry type. - PT_HIPROC = 0x7fffffff // Highest processor-specific program hdr entry type. + // ARM program header types. + PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility information + // These all contain stack unwind tables. + PT_ARM_EXIDX = 0x70000001, + PT_ARM_UNWIND = 0x70000001 }; // Segment flag bits. diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h index 109b3cff85..6dfb4dec0e 100644 --- a/include/llvm/Support/InstVisitor.h +++ b/include/llvm/Support/InstVisitor.h @@ -209,6 +209,9 @@ public: RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); } RetTy visitMemTransferInst(MemTransferInst &I) { DELEGATE(MemIntrinsic); } RetTy visitMemIntrinsic(MemIntrinsic &I) { DELEGATE(IntrinsicInst); } + RetTy visitVAStartInst(VAStartInst &I) { DELEGATE(IntrinsicInst); } + RetTy visitVAEndInst(VAEndInst &I) { DELEGATE(IntrinsicInst); } + RetTy visitVACopyInst(VACopyInst &I) { DELEGATE(IntrinsicInst); } RetTy visitIntrinsicInst(IntrinsicInst &I) { DELEGATE(CallInst); } // Call and Invoke are slightly different as they delegate first through @@ -262,6 +265,9 @@ private: case Intrinsic::memcpy: DELEGATE(MemCpyInst); case Intrinsic::memmove: DELEGATE(MemMoveInst); case Intrinsic::memset: DELEGATE(MemSetInst); + case Intrinsic::vastart: DELEGATE(VAStartInst); + case Intrinsic::vaend: DELEGATE(VAEndInst); + case Intrinsic::vacopy: DELEGATE(VACopyInst); case Intrinsic::not_intrinsic: break; } } diff --git a/include/llvm/Support/IntegersSubset.h b/include/llvm/Support/IntegersSubset.h index bb9e76925e..03039fd645 100644 --- a/include/llvm/Support/IntegersSubset.h +++ b/include/llvm/Support/IntegersSubset.h @@ -411,8 +411,8 @@ public: unsigned getSize() const { APInt sz(((const APInt&)getItem(0).getLow()).getBitWidth(), 0); for (unsigned i = 0, e = getNumItems(); i != e; ++i) { - const APInt &Low = getItem(i).getLow(); - const APInt &High = getItem(i).getHigh(); + const APInt Low = getItem(i).getLow(); + const APInt High = getItem(i).getHigh(); APInt S = High - Low + 1; sz += S; } @@ -426,8 +426,8 @@ public: APInt getSingleValue(unsigned idx) const { APInt sz(((const APInt&)getItem(0).getLow()).getBitWidth(), 0); for (unsigned i = 0, e = getNumItems(); i != e; ++i) { - const APInt &Low = getItem(i).getLow(); - const APInt &High = getItem(i).getHigh(); + const APInt Low = getItem(i).getLow(); + const APInt High = getItem(i).getHigh(); APInt S = High - Low + 1; APInt oldSz = sz; sz += S; diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index 3f7b7f4e8c..2f6b7e625c 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -19,26 +19,13 @@ namespace llvm { -class TGError { - SmallVector<SMLoc, 4> Locs; - std::string Message; -public: - TGError(ArrayRef<SMLoc> locs, const std::string &message) - : Locs(locs.begin(), locs.end()), Message(message) {} - - ArrayRef<SMLoc> getLoc() const { return Locs; } - const std::string &getMessage() const { return Message; } -}; - void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg); void PrintWarning(const char *Loc, const Twine &Msg); void PrintWarning(const Twine &Msg); -void PrintWarning(const TGError &Warning); void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); void PrintError(const char *Loc, const Twine &Msg); void PrintError(const Twine &Msg); -void PrintError(const TGError &Error); LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const std::string &Msg); LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h index 8030d38d73..2160e371bd 100644 --- a/include/llvm/Target/TargetCallingConv.h +++ b/include/llvm/Target/TargetCallingConv.h @@ -140,9 +140,19 @@ namespace ISD { /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...". bool IsFixed; + /// Index original Function's argument. + unsigned OrigArgIndex; + + /// Offset in bytes of current output value relative to the beginning of + /// original argument. E.g. if argument was splitted into four 32 bit + /// registers, we got 4 OutputArgs with PartOffsets 0, 4, 8 and 12. + unsigned PartOffset; + OutputArg() : IsFixed(false) {} - OutputArg(ArgFlagsTy flags, EVT vt, bool isfixed) - : Flags(flags), IsFixed(isfixed) { + OutputArg(ArgFlagsTy flags, EVT vt, bool isfixed, + unsigned origIdx, unsigned partOffs) + : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx), + PartOffset(partOffs) { VT = vt.getSimpleVT(); } }; diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h deleted file mode 100644 index 5e48629cf4..0000000000 --- a/include/llvm/Target/TargetELFWriterInfo.h +++ /dev/null @@ -1,121 +0,0 @@ -//===-- llvm/Target/TargetELFWriterInfo.h - ELF Writer Info -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the TargetELFWriterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_TARGETELFWRITERINFO_H -#define LLVM_TARGET_TARGETELFWRITERINFO_H - -namespace llvm { - - //===--------------------------------------------------------------------===// - // TargetELFWriterInfo - //===--------------------------------------------------------------------===// - - class TargetELFWriterInfo { - protected: - // EMachine - This field is the target specific value to emit as the - // e_machine member of the ELF header. - unsigned short EMachine; - bool is64Bit, isLittleEndian; - public: - - // Machine architectures - enum MachineType { - EM_NONE = 0, // No machine - EM_M32 = 1, // AT&T WE 32100 - EM_SPARC = 2, // SPARC - EM_386 = 3, // Intel 386 - EM_68K = 4, // Motorola 68000 - EM_88K = 5, // Motorola 88000 - EM_486 = 6, // Intel 486 (deprecated) - EM_860 = 7, // Intel 80860 - EM_MIPS = 8, // MIPS R3000 - EM_PPC = 20, // PowerPC - EM_ARM = 40, // ARM - EM_ALPHA = 41, // DEC Alpha - EM_SPARCV9 = 43, // SPARC V9 - EM_X86_64 = 62, // AMD64 - EM_HEXAGON = 164 // Qualcomm Hexagon - }; - - // ELF File classes - enum { - ELFCLASS32 = 1, // 32-bit object file - ELFCLASS64 = 2 // 64-bit object file - }; - - // ELF Endianess - enum { - ELFDATA2LSB = 1, // Little-endian object file - ELFDATA2MSB = 2 // Big-endian object file - }; - - explicit TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_); - virtual ~TargetELFWriterInfo(); - - unsigned short getEMachine() const { return EMachine; } - unsigned getEFlags() const { return 0; } - unsigned getEIClass() const { return is64Bit ? ELFCLASS64 : ELFCLASS32; } - unsigned getEIData() const { - return isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; - } - - /// ELF Header and ELF Section Header Info - unsigned getHdrSize() const { return is64Bit ? 64 : 52; } - unsigned getSHdrSize() const { return is64Bit ? 64 : 40; } - - /// Symbol Table Info - unsigned getSymTabEntrySize() const { return is64Bit ? 24 : 16; } - - /// getPrefELFAlignment - Returns the preferred alignment for ELF. This - /// is used to align some sections. - unsigned getPrefELFAlignment() const { return is64Bit ? 8 : 4; } - - /// getRelocationEntrySize - Entry size used in the relocation section - unsigned getRelocationEntrySize() const { - return is64Bit ? (hasRelocationAddend() ? 24 : 16) - : (hasRelocationAddend() ? 12 : 8); - } - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const = 0; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const = 0; - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const = 0; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const = 0; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const = 0; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const = 0; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const = 0; - }; - -} // end llvm namespace - -#endif // LLVM_TARGET_TARGETELFWRITERINFO_H diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 830e2d645a..f8925f25a1 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -423,6 +423,13 @@ public: getOperationAction(Op, VT) == Custom); } + /// isOperationExpand - Return true if the specified operation is illegal on + /// this target or unlikely to be made legal with custom lowering. This is + /// used to help guide high-level lowering decisions. + bool isOperationExpand(unsigned Op, EVT VT) const { + return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); + } + /// isOperationLegal - Return true if the specified operation is legal on this /// target. bool isOperationLegal(unsigned Op, EVT VT) const { @@ -1269,7 +1276,7 @@ protected: public: //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that - // the SelectionDAGLowering code knows how to lower these. + // the SelectionDAGBuilder code knows how to lower these. // /// LowerFormalArguments - This hook must be implemented to lower the @@ -1985,6 +1992,9 @@ public: ValueTypeActions.getTypeAction(NVT.getSimpleVT()) != TypePromoteInteger) && "Promote may not follow Expand or Promote"); + if (LA == TypeSplitVector) + NVT = EVT::getVectorVT(Context, VT.getVectorElementType(), + VT.getVectorNumElements() / 2); return LegalizeKind(LA, NVT); } diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 18e589e2bc..50066473b5 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -34,7 +34,6 @@ class MCContext; class PassManagerBase; class Target; class DataLayout; -class TargetELFWriterInfo; class TargetFrameLowering; class TargetInstrInfo; class TargetIntrinsicInfo; @@ -148,11 +147,6 @@ public: return 0; } - /// getELFWriterInfo - If this target supports an ELF writer, return - /// information for it, otherwise return null. - /// - virtual const TargetELFWriterInfo *getELFWriterInfo() const { return 0; } - /// hasMCRelaxAll - Check whether all machine code instructions should be /// relaxed. bool hasMCRelaxAll() const { return MCRelaxAll; } diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 942ee44827..afa2ee2744 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -446,18 +446,6 @@ public: return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC); } - /// canCombineSubRegIndices - Given a register class and a list of - /// subregister indices, return true if it's possible to combine the - /// subregister indices into one that corresponds to a larger - /// subregister. Return the new subregister index by reference. Note the - /// new index may be zero if the given subregisters can be combined to - /// form the whole register. - virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const { - return 0; - } - /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. @@ -488,6 +476,8 @@ public: /// composeSubRegIndices - Return the subregister index you get from composing /// two subregister indices. /// + /// The special null sub-register index composes as the identity. + /// /// If R:a:b is the same register as R:c, then composeSubRegIndices(a, b) /// returns c. Note that composeSubRegIndices does not tell you about illegal /// compositions. If R does not have a subreg a, or R:a does not have a subreg @@ -497,11 +487,19 @@ public: /// ssub_0:S0 - ssub_3:S3 subregs. /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2. /// - virtual unsigned composeSubRegIndices(unsigned a, unsigned b) const { - // This default implementation is correct for most targets. - return b; + unsigned composeSubRegIndices(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return composeSubRegIndicesImpl(a, b); } +protected: + /// Overridden by TableGen in targets that have sub-registers. + virtual unsigned composeSubRegIndicesImpl(unsigned, unsigned) const { + llvm_unreachable("Target has no sub-registers"); + } + +public: /// getCommonSuperRegClass - Find a common super-register class if it exists. /// /// Find a register class, SuperRC and two sub-register indices, PreA and diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h index 25a7edeb01..625be7208a 100644 --- a/include/llvm/Target/TargetTransformImpl.h +++ b/include/llvm/Target/TargetTransformImpl.h @@ -46,28 +46,49 @@ public: virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; + + virtual bool shouldBuildLookupTables() const; }; class VectorTargetTransformImpl : public VectorTargetTransformInfo { -private: +protected: const TargetLowering *TLI; /// Estimate the cost of type-legalization and the legalized type. std::pair<unsigned, EVT> getTypeLegalizationCost(LLVMContext &C, EVT Ty) const; + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + public: explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {} - + virtual ~VectorTargetTransformImpl() {} virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getBroadcastCost(Type *Tp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + + virtual unsigned getCFInstrCost(unsigned Opcode) const; + + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const; + + virtual unsigned getNumberOfParts(Type *Tp) const; }; } // end llvm namespace diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h index 96470c30ca..94db490443 100644 --- a/include/llvm/TargetTransformInfo.h +++ b/include/llvm/TargetTransformInfo.h @@ -45,11 +45,11 @@ public: /// used. TargetTransformInfo(); - explicit TargetTransformInfo(const ScalarTargetTransformInfo* S, - const VectorTargetTransformInfo *V) - : ImmutablePass(ID), STTI(S), VTTI(V) { - initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry()); - } + TargetTransformInfo(const ScalarTargetTransformInfo* S, + const VectorTargetTransformInfo *V) + : ImmutablePass(ID), STTI(S), VTTI(V) { + initializeTargetTransformInfoPass(*PassRegistry::getPassRegistry()); + } TargetTransformInfo(const TargetTransformInfo &T) : ImmutablePass(ID), STTI(T.STTI), VTTI(T.VTTI) { } @@ -102,7 +102,7 @@ public: /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. - virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const { + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } /// Is this type legal. @@ -117,6 +117,11 @@ public: virtual unsigned getJumpBufSize() const { return 0; } + /// shouldBuildLookupTables - Return true if switches should be turned into + /// lookup tables for the target. + virtual bool shouldBuildLookupTables() const { + return true; + } }; /// VectorTargetTransformInfo - This interface is used by the vectorizers @@ -143,19 +148,55 @@ public: return 1; } + /// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc. + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + return 1; + } + /// Returns the cost of a vector broadcast of a scalar at place zero to a /// vector of type 'Tp'. virtual unsigned getBroadcastCost(Type *Tp) const { return 1; } - /// Returns the cost of Load and Store instructions. + /// Returns the expected cost of cast instructions, such as bitcast, trunc, + /// zext, etc. + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + /// Returns the expected cost of control-flow related instrutctions such as + /// Phi, Ret, Br. + virtual unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + /// Returns the expected cost of compare and select instructions. + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + /// Returns the expected cost of vector Insert and Extract. + /// Use -1 to indicate that there is no information on the index value. + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const { + return 1; + } + + /// Returns the cost of Load and Store instructions. virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { return 1; } + /// Returns the number of pieces into which the provided type must be + /// split during legalization. Zero is returned when the answer is unknown. + virtual unsigned getNumberOfParts(Type *Tp) const { + return 0; + } }; } // End llvm namespace diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 08d3bbd941..fc1cd59e4e 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -104,23 +104,14 @@ Pass *createPruneEHPass(); //===----------------------------------------------------------------------===// /// createInternalizePass - This pass loops over all of the functions in the -/// input module, internalizing all globals (functions and variables) not part -/// of the api. If a list of symbols is specified with the -/// -internalize-public-api-* command line options, those symbols are not -/// internalized and all others are. Otherwise if AllButMain is set and the -/// main function is found, all other globals are marked as internal. If no api -/// is supplied and AllButMain is not set, or no main function is found, nothing -/// is internalized. -/// -ModulePass *createInternalizePass(bool AllButMain); - -/// createInternalizePass - This pass loops over all of the functions in the /// input module, internalizing all globals (functions and variables) not in the /// given exportList. /// /// Note that commandline options that are used with the above function are not -/// used now! Also, when exportList is empty, nothing is internalized. +/// used now! ModulePass *createInternalizePass(const std::vector<const char *> &exportList); +/// createInternalizePass - Same as above, but with an empty exportList. +ModulePass *createInternalizePass(); //===----------------------------------------------------------------------===// /// createDeadArgEliminationPass - This pass removes arguments from functions diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 47ce90265b..3ea0a42720 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -104,6 +104,7 @@ public: bool DisableUnitAtATime; bool DisableUnrollLoops; bool Vectorize; + bool LoopVectorize; private: /// ExtensionList - This is list of all of the extensions that are registered. diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 49eeb57622..be3029e545 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -37,6 +37,7 @@ class AllocaInst; class ConstantExpr; class DataLayout; class TargetLibraryInfo; +class TargetTransformInfo; class DIBuilder; template<typename T> class SmallVectorImpl; @@ -134,7 +135,8 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// of the CFG. It returns true if a modification was made, possibly deleting /// the basic block that was pointed to. /// -bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0); +bool SimplifyCFG(BasicBlock *BB, const DataLayout *TD = 0, + const TargetTransformInfo *TTI = 0); /// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, /// and if a predecessor branches to us and one of our successors, fold the @@ -177,9 +179,8 @@ static inline unsigned getKnownAlignment(Value *V, const DataLayout *TD = 0) { template<typename IRBuilderTy> Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP, bool NoAssumptions = false) { - unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace(); gep_type_iterator GTI = gep_type_begin(GEP); - Type *IntPtrTy = TD.getIntPtrType(GEP->getContext(), AS); + Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will @@ -187,7 +188,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP, bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoAssumptions; // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD.getPointerSizeInBits(); uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; diff --git a/include/llvm/Type.h b/include/llvm/Type.h index 5a867045af..def45750dd 100644 --- a/include/llvm/Type.h +++ b/include/llvm/Type.h @@ -153,7 +153,7 @@ public: /// isPPC_FP128Ty - Return true if this is powerpc long double. bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; } - /// isFloatingPointTy - Return true if this is one of the five floating point + /// isFloatingPointTy - Return true if this is one of the six floating point /// types bool isFloatingPointTy() const { return getTypeID() == HalfTyID || getTypeID() == FloatTyID || @@ -167,7 +167,7 @@ public: /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP. /// - bool isFPOrFPVectorTy() const; + bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } /// isLabelTy - Return true if this is 'label'. bool isLabelTy() const { return getTypeID() == LabelTyID; } @@ -185,7 +185,7 @@ public: /// isIntOrIntVectorTy - Return true if this is an integer type or a vector of /// integer types. /// - bool isIntOrIntVectorTy() const; + bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } /// isFunctionTy - True if this is an instance of FunctionType. /// @@ -203,6 +203,11 @@ public: /// bool isPointerTy() const { return getTypeID() == PointerTyID; } + /// isPtrOrPtrVectorTy - Return true if this is a pointer type or a vector of + /// pointer types. + /// + bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); } + /// isVectorTy - True if this is an instance of VectorType. /// bool isVectorTy() const { return getTypeID() == VectorTyID; } @@ -293,6 +298,7 @@ public: /// getScalarType - If this is a vector type, return the element type, /// otherwise return 'this'. + const Type *getScalarType() const; Type *getScalarType(); //===--------------------------------------------------------------------===// @@ -340,8 +346,10 @@ public: unsigned getVectorNumElements() const; Type *getVectorElementType() const { return getSequentialElementType(); } - unsigned getPointerAddressSpace() const; Type *getPointerElementType() const { return getSequentialElementType(); } + + /// \brief Get the address space of this pointer or pointer vector type. + unsigned getPointerAddressSpace() const; //===--------------------------------------------------------------------===// // Static members exported by the Type class itself. Useful for getting diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 588206e915..9dc81a6a63 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -26,6 +26,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeBasicAliasAnalysisPass(Registry); initializeBlockFrequencyInfoPass(Registry); initializeBranchProbabilityInfoPass(Registry); + initializeCostModelAnalysisPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); @@ -47,7 +48,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLoopDependenceAnalysisPass(Registry); initializeLoopInfoPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 36903f94e2..4bb93ee88a 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -58,12 +58,12 @@ static bool isNonEscapingLocalObject(const Value *V) { // then it has not escaped before entering the function. Check if it escapes // inside the function. if (const Argument *A = dyn_cast<Argument>(V)) - if (A->hasByValAttr() || A->hasNoAliasAttr()) { - // Don't bother analyzing arguments already known not to escape. - if (A->hasNoCaptureAttr()) - return true; + if (A->hasByValAttr() || A->hasNoAliasAttr()) + // Note even if the argument is marked nocapture we still need to check + // for copies made inside the function. The nocapture attribute only + // specifies that there are no copies made that outlive the function. return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); - } + return false; } @@ -286,8 +286,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = GEPOp->getOperand(0); continue; } - - unsigned AS = GEPOp->getPointerAddressSpace(); + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); for (User::const_op_iterator I = GEPOp->op_begin()+1, @@ -316,7 +315,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); - if (TD->getPointerSizeInBits(AS) > Width) + if (TD->getPointerSizeInBits() > Width) Extension = EK_SignExt; // Use GetLinearExpression to decompose the index into a C1*V+C2 form. @@ -345,7 +344,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits(AS)) { + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } @@ -1246,6 +1245,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1, V2); std::swap(V1Size, V2Size); std::swap(O1, O2); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); @@ -1255,6 +1255,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const PHINode *PN = dyn_cast<PHINode>(V1)) { AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, @@ -1265,6 +1266,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); + std::swap(V1TBAAInfo, V2TBAAInfo); } if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 3ce888fefa..b3a40bee42 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp + CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp DbgInfoPrinter.cpp @@ -27,7 +28,6 @@ add_llvm_library(LLVMAnalysis LibCallSemantics.cpp Lint.cpp Loads.cpp - LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp MemDepPrinter.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index d669268496..651a54be1b 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -91,16 +91,14 @@ bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { // which doesn't contain values outside the range of a pointer. if (isa<IntToPtrInst>(CI) && TD && TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits( - cast<IntToPtrInst>(CI)->getAddressSpace())) + Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) return true; // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. if (isa<PtrToIntInst>(CI) && TD && TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits( - cast<PtrToIntInst>(CI)->getPointerAddressSpace())) + Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) return true; // trunc to a native type is free (assuming the target has compare and diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index de6d61d78b..5cac8ca3ba 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -170,15 +170,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++)); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); - + // Zero extend the element to the right size. Src = ConstantExpr::getZExt(Src, Elt->getType()); - + // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; - + // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); } @@ -378,8 +378,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) - return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, TD); } @@ -575,7 +575,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) return 0; - Type *IntPtrTy = TD->getIntPtrType(ResultTy); + Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); bool Any = false; SmallVector<Constant*, 32> NewIdxs; @@ -629,8 +629,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, !Ptr->getType()->isPointerTy()) return 0; - unsigned AS = cast<PointerType>(Ptr->getType())->getAddressSpace(); - Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext(), AS); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' @@ -703,8 +702,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Also, this helps GlobalOpt do SROA on GlobalVariables. Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); - assert(Ty->getPointerAddressSpace() == AS - && "Operand and result of GEP should be in the same address space."); SmallVector<Constant*, 32> NewIdxs; do { if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { @@ -720,7 +717,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext(), AS); + IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -893,7 +890,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, const DataLayout *TD, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) @@ -919,11 +916,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, if (TD && CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace(); - if (TD->getPointerSizeInBits(AS) < InWidth) { + if (TD->getPointerSizeInBits() < InWidth) { Constant *Mask = ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, - TD->getPointerSizeInBits(AS))); + TD->getPointerSizeInBits())); Input = ConstantExpr::getAnd(Input, Mask); } // Do a zext or trunc to get to the dest size. @@ -936,9 +932,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, // the int size is >= the ptr size. This requires knowing the width of a // pointer, so it can't be done in ConstantExpr::getCast. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) - if (TD && CE->getOpcode() == Instruction::PtrToInt && - TD->getTypeSizeInBits(CE->getOperand(0)->getType()) - <= CE->getType()->getScalarSizeInBits()) + if (TD && + TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && + CE->getOpcode() == Instruction::PtrToInt) return FoldBitCast(CE->getOperand(0), DestTy, *TD); return ConstantExpr::getCast(Opcode, Ops[0], DestTy); @@ -990,10 +986,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // ConstantExpr::getCompare cannot do this, because it doesn't have TD // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { - Type *IntPtrTy = NULL; if (TD && Ops1->isNullValue()) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { - IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1004,21 +999,19 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt) { - IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); - if (CE0->getType() == IntPtrTy) { - Constant *C = CE0->getOperand(0); - Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); - } + if (CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } } if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1027,16 +1020,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); } - } - // Only do this transformation if the int is intptrty in size, otherwise - // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt) { - IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); - if (CE0->getType() == IntPtrTy && - CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if ((CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD, TLI); + CE1->getOperand(0), TD, TLI); } } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp new file mode 100644 index 0000000000..5adbf45810 --- /dev/null +++ b/lib/Analysis/CostModel.cpp @@ -0,0 +1,193 @@ +//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the cost model analysis. It provides a very basic cost +// estimation for LLVM-IR. The cost result can be thought of as cycles, but it +// is really unit-less. The estimated cost is ment to be used for comparing +// alternatives. +// +//===----------------------------------------------------------------------===// + +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME +#include "llvm/Analysis/Passes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/TargetTransformInfo.h" +#include "llvm/Value.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class CostModelAnalysis : public FunctionPass { + + public: + static char ID; // Class identification, replacement for typeinfo + CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + initializeCostModelAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// Returns the expected cost of the instruction. + /// Returns -1 if the cost is unknown. + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + unsigned getInstructionCost(Instruction *I) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual void print(raw_ostream &OS, const Module*) const; + + /// The function that we analyze. + Function *F; + /// Vector target information. + const VectorTargetTransformInfo *VTTI; + }; +} // End of anonymous namespace + +// Register this pass. +char CostModelAnalysis::ID = 0; +static const char cm_name[] = "Cost Model Analysis"; +INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) +INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) + +FunctionPass *llvm::createCostModelAnalysisPass() { + return new CostModelAnalysis(); +} + +void +CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool +CostModelAnalysis::runOnFunction(Function &F) { + this->F = &F; + + // Target information. + TargetTransformInfo *TTI; + TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + if (TTI) + VTTI = TTI->getVectorTargetTransformInfo(); + + return false; +} + +unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { + if (!VTTI) + return -1; + + switch (I->getOpcode()) { + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return VTTI->getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + Type *CondTy = SI->getCondition()->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + } + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace()); + } + case Instruction::Load: { + LoadInst *LI = cast<LoadInst>(I); + return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace()); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + } + case Instruction::ExtractElement: { + ExtractElementInst * EEI = cast<ExtractElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return VTTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return VTTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + default: + // We don't have any information on this instruction. + return -1; + } +} + +void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { + if (!F) + return; + + for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { + for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { + Instruction *Inst = it; + unsigned Cost = getInstructionCost(Inst); + if (Cost != (unsigned)-1) + OS << "Cost Model: Found an estimated cost of " << Cost; + else + OS << "Cost Model: Unknown cost"; + + OS << " for instruction: "<< *Inst << "\n"; + } + } +} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index f97f0f2de6..95ac5ea233 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -1773,7 +1773,7 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, // where i and j are induction variable, c1 and c2 are loop invariant, // and a and b are constants. // Returns true if any possible dependence is disproved. -// Marks the result as inconsistant. +// Marks the result as inconsistent. // Works in some cases that symbolicRDIVtest doesn't, and vice versa. bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, @@ -2202,7 +2202,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { // gcdMIVtest - // Tests an MIV subscript pair for dependence. // Returns true if any possible dependence is disproved. -// Marks the result as inconsistant. +// Marks the result as inconsistent. // Can sometimes disprove the equal direction for 1 or more loops, // as discussed in Michael Wolfe's book, // High Performance Compilers for Parallel Computing, page 235. @@ -2278,11 +2278,12 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, assert(!Constant && "Surprised to find multiple constants"); Constant = cast<SCEVConstant>(Operand); } - else if (isa<SCEVMulExpr>(Operand)) { + else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { // Search for constant operand to participate in GCD; // If none found; return false. - const SCEVConstant *ConstOp = - getConstantPart(cast<SCEVMulExpr>(Operand)); + const SCEVConstant *ConstOp = getConstantPart(Product); + if (!ConstOp) + return false; APInt ConstOpValue = ConstOp->getValue()->getValue(); ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOpValue.abs()); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 64e183d60c..5f51f775f1 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -243,8 +243,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { if (!TD) return false; - unsigned AS = GEP.getPointerAddressSpace(); - unsigned IntPtrWidth = TD->getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -392,8 +391,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - unsigned AS = I.getPointerAddressSpace(); - if (TD && IntegerSize >= TD->getPointerSizeInBits(AS)) { + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -427,8 +425,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - unsigned AS = I.getAddressSpace(); - if (TD && IntegerSize <= TD->getPointerSizeInBits(AS)) { + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -763,8 +760,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!TD || !V->getType()->isPointerTy()) return 0; - unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();; - unsigned IntPtrWidth = TD->getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -788,7 +784,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD->getIntPtrType(V->getType()); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); } @@ -828,7 +824,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getTypeSizeInBits(PTy); + unsigned PointerSize = TD->getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 7ef74f67ce..b3d62487fc 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -666,8 +666,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// 'Offset' APInt must be the bitwidth of the target's pointer size. static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, APInt &Offset) { - unsigned AS = GEP->getPointerAddressSpace(); - unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); gep_type_iterator GTI = gep_type_begin(GEP); @@ -697,14 +696,12 @@ static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -/// FIXME: This function also exists in InlineCost.cpp. static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Value *&V) { if (!V->getType()->isPointerTy()) return 0; - unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();; - unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -728,7 +725,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext(), AS); + Type *IntPtrTy = TD.getIntPtrType(V->getContext()); return ConstantInt::get(IntPtrTy, Offset); } @@ -1880,7 +1877,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && - Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { + Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 751118a86e..2b87d80d37 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" @@ -294,7 +295,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { //===----------------------------------------------------------------------===// namespace { - /// LVIValueHandle - A callback value handle update the cache when + /// LVIValueHandle - A callback value handle updates the cache when /// values are erased. class LazyValueInfoCache; struct LVIValueHandle : public CallbackVH { @@ -557,13 +558,11 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (LoadInst *L = dyn_cast<LoadInst>(I)) { return L->getPointerAddressSpace() == 0 && - GetUnderlyingObject(L->getPointerOperand()) == - GetUnderlyingObject(Ptr); + GetUnderlyingObject(L->getPointerOperand()) == Ptr; } if (StoreInst *S = dyn_cast<StoreInst>(I)) { return S->getPointerAddressSpace() == 0 && - GetUnderlyingObject(S->getPointerOperand()) == - GetUnderlyingObject(Ptr); + GetUnderlyingObject(S->getPointerOperand()) == Ptr; } if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { if (MI->isVolatile()) return false; @@ -573,11 +572,11 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { if (!Len || Len->isZero()) return false; if (MI->getDestAddressSpace() == 0) - if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + if (GetUnderlyingObject(MI->getRawDest()) == Ptr) return true; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) if (MTI->getSourceAddressSpace() == 0) - if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr) + if (GetUnderlyingObject(MTI->getRawSource()) == Ptr) return true; } return false; @@ -591,13 +590,19 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, // then we know that the pointer can't be NULL. bool NotNull = false; if (Val->getType()->isPointerTy()) { - if (isa<AllocaInst>(Val)) { + if (isKnownNonNull(Val)) { NotNull = true; } else { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ - if (InstructionDereferencesPointer(BI, Val)) { - NotNull = true; - break; + Value *UnderlyingVal = GetUnderlyingObject(Val); + // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge + // inside InstructionDereferencesPointer either. + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + if (InstructionDereferencesPointer(BI, UnderlyingVal)) { + NotNull = true; + break; + } } } } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index d62808e9cd..6d6d580ed1 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -626,7 +626,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (W != V) return findValueImpl(W, OffsetOk, Visited); } else if (CastInst *CI = dyn_cast<CastInst>(V)) { - if (CI->isNoopCast(*TD)) + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) return findValueImpl(CI->getOperand(0), OffsetOk, Visited); } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), @@ -639,7 +640,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - TD ? TD->getIntPtrType(CE->getType()) : + TD ? TD->getIntPtrType(V->getContext()) : Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp deleted file mode 100644 index b696e5fae1..0000000000 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ /dev/null @@ -1,362 +0,0 @@ -//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is the (beginning) of an implementation of a loop dependence analysis -// framework, which is used to detect dependences in memory accesses in loops. -// -// Please note that this is work in progress and the interface is subject to -// change. -// -// TODO: adapt as implementation progresses. -// -// TODO: document lingo (pair, subscript, index) -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "lda" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/LoopDependenceAnalysis.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Instructions.h" -#include "llvm/Operator.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" -using namespace llvm; - -STATISTIC(NumAnswered, "Number of dependence queries answered"); -STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); -STATISTIC(NumDependent, "Number of pairs with dependent accesses"); -STATISTIC(NumIndependent, "Number of pairs with independent accesses"); -STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); - -LoopPass *llvm::createLoopDependenceAnalysisPass() { - return new LoopDependenceAnalysis(); -} - -INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda", - "Loop Dependence Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda", - "Loop Dependence Analysis", false, true) -char LoopDependenceAnalysis::ID = 0; - -//===----------------------------------------------------------------------===// -// Utility Functions -//===----------------------------------------------------------------------===// - -static inline bool IsMemRefInstr(const Value *V) { - const Instruction *I = dyn_cast<const Instruction>(V); - return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); -} - -static void GetMemRefInstrs(const Loop *L, - SmallVectorImpl<Instruction*> &Memrefs) { - for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; ++b) - for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); - i != ie; ++i) - if (IsMemRefInstr(i)) - Memrefs.push_back(i); -} - -static bool IsLoadOrStoreInst(Value *I) { - // Returns true if the load or store can be analyzed. Atomic and volatile - // operations have properties which this analysis does not understand. - if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return LI->isUnordered(); - else if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return SI->isUnordered(); - return false; -} - -static Value *GetPointerOperand(Value *I) { - if (LoadInst *i = dyn_cast<LoadInst>(I)) - return i->getPointerOperand(); - if (StoreInst *i = dyn_cast<StoreInst>(I)) - return i->getPointerOperand(); - llvm_unreachable("Value is no load or store instruction!"); -} - -static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, - const Value *A, - const Value *B) { - const Value *aObj = GetUnderlyingObject(A); - const Value *bObj = GetUnderlyingObject(B); - return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), - bObj, AA->getTypeStoreSize(bObj->getType())); -} - -static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { - return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); -} - -//===----------------------------------------------------------------------===// -// Dependence Testing -//===----------------------------------------------------------------------===// - -bool LoopDependenceAnalysis::isDependencePair(const Value *A, - const Value *B) const { - return IsMemRefInstr(A) && - IsMemRefInstr(B) && - (cast<const Instruction>(A)->mayWriteToMemory() || - cast<const Instruction>(B)->mayWriteToMemory()); -} - -bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, - Value *B, - DependencePair *&P) { - void *insertPos = 0; - FoldingSetNodeID id; - id.AddPointer(A); - id.AddPointer(B); - - P = Pairs.FindNodeOrInsertPos(id, insertPos); - if (P) return true; - - P = new (PairAllocator) DependencePair(id, A, B); - Pairs.InsertNode(P, insertPos); - return false; -} - -void LoopDependenceAnalysis::getLoops(const SCEV *S, - DenseSet<const Loop*>* Loops) const { - // Refactor this into an SCEVVisitor, if efficiency becomes a concern. - for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) - if (!SE->isLoopInvariant(S, L)) - Loops->insert(L); -} - -bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { - DenseSet<const Loop*> loops; - getLoops(S, &loops); - return loops.empty(); -} - -bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { - const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S); - return isLoopInvariant(S) || (rec && rec->isAffine()); -} - -bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { - return isLoopInvariant(A) && isLoopInvariant(B); -} - -bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { - DenseSet<const Loop*> loops; - getLoops(A, &loops); - getLoops(B, &loops); - return loops.size() == 1; -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseZIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); - return A == B ? Dependent : Independent; -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseSIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - return Unknown; // TODO: Implement. -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseMIV(const SCEV *A, - const SCEV *B, - Subscript *S) const { - return Unknown; // TODO: Implement. -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analyseSubscript(const SCEV *A, - const SCEV *B, - Subscript *S) const { - DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n"); - - if (A == B) { - DEBUG(dbgs() << " -> [D] same SCEV\n"); - return Dependent; - } - - if (!isAffine(A) || !isAffine(B)) { - DEBUG(dbgs() << " -> [?] not affine\n"); - return Unknown; - } - - if (isZIVPair(A, B)) - return analyseZIV(A, B, S); - - if (isSIVPair(A, B)) - return analyseSIV(A, B, S); - - return analyseMIV(A, B, S); -} - -LoopDependenceAnalysis::DependenceResult -LoopDependenceAnalysis::analysePair(DependencePair *P) const { - DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); - - // We only analyse loads and stores but no possible memory accesses by e.g. - // free, call, or invoke instructions. - if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { - DEBUG(dbgs() << "--> [?] no load/store\n"); - return Unknown; - } - - Value *aPtr = GetPointerOperand(P->A); - Value *bPtr = GetPointerOperand(P->B); - - switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - // We can not analyse objects if we do not know about their aliasing. - DEBUG(dbgs() << "---> [?] may alias\n"); - return Unknown; - - case AliasAnalysis::NoAlias: - // If the objects noalias, they are distinct, accesses are independent. - DEBUG(dbgs() << "---> [I] no alias\n"); - return Independent; - - case AliasAnalysis::MustAlias: - break; // The underlying objects alias, test accesses for dependence. - } - - const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr); - const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr); - - if (!aGEP || !bGEP) - return Unknown; - - // FIXME: Is filtering coupled subscripts necessary? - - // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding - // trailing zeroes to the smaller GEP, if needed. - typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy; - GEPOpdPairsTy opds; - for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), - aEnd = aGEP->idx_end(), - bIdx = bGEP->idx_begin(), - bEnd = bGEP->idx_end(); - aIdx != aEnd && bIdx != bEnd; - aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { - const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); - const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); - opds.push_back(std::make_pair(aSCEV, bSCEV)); - } - - if (!opds.empty() && opds[0].first != opds[0].second) { - // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting - // - // TODO: this could be relaxed by adding the size of the underlying object - // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we - // know that x is a [100 x i8]*, we could modify the first subscript to be - // (i, 200-i) instead of (i, -i). - return Unknown; - } - - // Now analyse the collected operand pairs (skipping the GEP ptr offsets). - for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); - i != end; ++i) { - Subscript subscript; - DependenceResult result = analyseSubscript(i->first, i->second, &subscript); - if (result != Dependent) { - // We either proved independence or failed to analyse this subscript. - // Further subscripts will not improve the situation, so abort early. - return result; - } - P->Subscripts.push_back(subscript); - } - // We successfully analysed all subscripts but failed to prove independence. - return Dependent; -} - -bool LoopDependenceAnalysis::depends(Value *A, Value *B) { - assert(isDependencePair(A, B) && "Values form no dependence pair!"); - ++NumAnswered; - - DependencePair *p; - if (!findOrInsertDependencePair(A, B, p)) { - // The pair is not cached, so analyse it. - ++NumAnalysed; - switch (p->Result = analysePair(p)) { - case Dependent: ++NumDependent; break; - case Independent: ++NumIndependent; break; - case Unknown: ++NumUnknown; break; - } - } - return p->Result != Independent; -} - -//===----------------------------------------------------------------------===// -// LoopDependenceAnalysis Implementation -//===----------------------------------------------------------------------===// - -bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { - this->L = L; - AA = &getAnalysis<AliasAnalysis>(); - SE = &getAnalysis<ScalarEvolution>(); - return false; -} - -void LoopDependenceAnalysis::releaseMemory() { - Pairs.clear(); - PairAllocator.Reset(); -} - -void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive<AliasAnalysis>(); - AU.addRequiredTransitive<ScalarEvolution>(); -} - -static void PrintLoopInfo(raw_ostream &OS, - LoopDependenceAnalysis *LDA, const Loop *L) { - if (!L->empty()) return; // ignore non-innermost loops - - SmallVector<Instruction*, 8> memrefs; - GetMemRefInstrs(L, memrefs); - - OS << "Loop at depth " << L->getLoopDepth() << ", header block: "; - WriteAsOperand(OS, L->getHeader(), false); - OS << "\n"; - - OS << " Load/store instructions: " << memrefs.size() << "\n"; - for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; - - OS << " Pairwise dependence results:\n"; - for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - for (SmallVector<Instruction*, 8>::const_iterator y = x + 1; - y != end; ++y) - if (LDA->isDependencePair(*x, *y)) - OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) - << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") - << "\n"; -} - -void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { - // TODO: doc why const_cast is safe - PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L); -} diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 8d903c63af..0a539fe758 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -376,10 +376,9 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, const TargetLibraryInfo *TLI, LLVMContext &Context, - bool RoundToAlign, - unsigned AS) + bool RoundToAlign) : TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = TD->getIntPtrType(Context, AS); + IntegerType *IntTy = TD->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); } @@ -562,10 +561,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD, const TargetLibraryInfo *TLI, - LLVMContext &Context, - unsigned AS) + LLVMContext &Context) : TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { - IntTy = TD->getIntPtrType(Context, AS); + IntTy = TD->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp index 69286efb3c..a4f634af53 100644 --- a/lib/Analysis/ProfileDataLoader.cpp +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -51,13 +51,7 @@ static unsigned AddCounts(unsigned A, unsigned B) { if (A == ProfileDataLoader::Uncounted) return B; if (B == ProfileDataLoader::Uncounted) return A; - // Saturate to the maximum storable value. This could change taken/nottaken - // ratios, but is presumably better than wrapping and thus potentially - // inverting ratios. - uint64_t tmp = (uint64_t)A + (uint64_t)B; - if (tmp > (uint64_t)ProfileDataLoader::MaxCount) - tmp = ProfileDataLoader::MaxCount; - return (unsigned)tmp; + return A + B; } /// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' @@ -120,7 +114,6 @@ static void ReadProfilingArgBlock(const char *ToolName, FILE *F, } const unsigned ProfileDataLoader::Uncounted = ~0U; -const unsigned ProfileDataLoader::MaxCount = ~0U - 1U; /// ProfileDataLoader ctor - Read the specified profiling data file, reporting /// a fatal error if the file is invalid or broken. diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 148912b766..5f60bd1674 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -105,6 +105,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); +// FIXME: Enable this with XDEBUG when the test suite is clean. +static cl::opt<bool> +VerifySCEV("verify-scev", + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); + INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -2581,12 +2586,13 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy, Type *IntPtrTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(IntPtrTy, TD->getTypeAllocSize(AllocTy)); + return getConstant(TD->getIntPtrType(getContext()), + TD->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) @@ -2605,13 +2611,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Type *IntPtrTy, +const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, unsigned FieldNo) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(IntPtrTy, + return getConstant(TD->getIntPtrType(getContext()), TD->getStructLayout(STy)->getElementOffset(FieldNo)); Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); @@ -2698,7 +2704,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(Ty); + if (TD) return TD->getIntPtrType(getContext()); // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); @@ -3151,13 +3157,13 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, IntPtrTy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI, IntPtrTy); + const SCEV *ElementSize = getSizeOfExpr(*GTI); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); @@ -6934,3 +6940,87 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { UnsignedRanges.erase(S); SignedRanges.erase(S); } + +typedef DenseMap<const Loop *, std::string> VerifyMap; + +/// replaceSubString - Replaces all occurences of From in Str with To. +static void replaceSubString(std::string &Str, StringRef From, StringRef To) { + size_t Pos = 0; + while ((Pos = Str.find(From, Pos)) != std::string::npos) { + Str.replace(Pos, From.size(), To.data(), To.size()); + Pos += To.size(); + } +} + +/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. +static void +getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { + getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); + + // false and 0 are semantically equivalent. This can happen in dead loops. + replaceSubString(OS.str(), "false", "0"); + // Remove wrap flags, their use in SCEV is highly fragile. + // FIXME: Remove this when SCEV gets smarter about them. + replaceSubString(OS.str(), "<nw>", ""); + replaceSubString(OS.str(), "<nsw>", ""); + replaceSubString(OS.str(), "<nuw>", ""); + } + } +} + +void ScalarEvolution::verifyAnalysis() const { + if (!VerifySCEV) + return; + + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + + // Gather stringified backedge taken counts for all loops using SCEV's caches. + // FIXME: It would be much better to store actual values instead of strings, + // but SCEV pointers will change if we drop the caches. + VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + + // Gather stringified backedge taken counts for all loops without using + // SCEV's caches. + SE.releaseMemory(); + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + + // Now compare whether they're the same with and without caches. This allows + // verifying that no pass changed the cache. + assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && + "New loops suddenly appeared!"); + + for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), + OldE = BackedgeDumpsOld.end(), + NewI = BackedgeDumpsNew.begin(); + OldI != OldE; ++OldI, ++NewI) { + assert(OldI->first == NewI->first && "Loop order changed!"); + + // Compare the stringified SCEVs. We don't care if undef backedgetaken count + // changes. + // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This + // means that a pass is buggy or SCEV has to learn a new pattern but is + // usually not harmful. + if (OldI->second != NewI->second && + OldI->second.find("undef") == std::string::npos && + NewI->second.find("undef") == std::string::npos && + OldI->second != "***COULDNOTCOMPUTE***" && + NewI->second != "***COULDNOTCOMPUTE***") { + dbgs() << "SCEVValidator: SCEV for loop '" + << OldI->first->getHeader()->getName() + << "' changed from '" << OldI->second + << "' to '" << NewI->second << "'!\n"; + std::abort(); + } + } + + // TODO: Verify more things. +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0295da5e4a..111bfb4a6a 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -417,9 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector<const SCEV *, 8> ScaledOps; if (ElTy->isSized()) { - Type *IntPtrTy = SE.TD ? SE.TD->getIntPtrType(PTy) : - IntegerType::getInt64Ty(PTy->getContext()); - const SCEV *ElSize = SE.getSizeOfExpr(ElTy, IntPtrTy); + const SCEV *ElSize = SE.getSizeOfExpr(ElTy); if (!ElSize->isZero()) { SmallVector<const SCEV *, 8> NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 1d7f0692cb..3beb373dc5 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -40,8 +40,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; assert(isa<PointerType>(Ty) && "Expected a pointer type!"); - return TD ? - TD->getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()) : 0; + return TD ? TD->getPointerSizeInBits() : 0; } static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, @@ -430,15 +429,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::ZExt: case Instruction::Trunc: { Type *SrcTy = I->getOperand(0)->getType(); - + unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - if (SrcTy->isPointerTy()) - SrcBitWidth = TD->getTypeSizeInBits(SrcTy); - else - SrcBitWidth = SrcTy->getScalarSizeInBits(); - + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + + assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); @@ -1622,8 +1619,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, // Re-sign extend from the pointer size if needed to get overflow edge cases // right. - unsigned AS = GEP->getPointerAddressSpace(); - unsigned PtrSize = TD.getPointerSizeInBits(AS); + unsigned PtrSize = TD.getPointerSizeInBits(); if (PtrSize < 64) Offset = SignExtend64(Offset, PtrSize); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 91f973d8d3..a60e4aa41c 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -558,7 +558,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); - KEYWORD(forcesizeopt); + KEYWORD(minsize); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 75fc16cd95..ac803c5783 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -953,7 +953,7 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { case lltok::kw_naked: B.addAttribute(Attributes::Naked); break; case lltok::kw_nonlazybind: B.addAttribute(Attributes::NonLazyBind); break; case lltok::kw_address_safety: B.addAttribute(Attributes::AddressSafety); break; - case lltok::kw_forcesizeopt: B.addAttribute(Attributes::ForceSizeOpt); break; + case lltok::kw_minsize: B.addAttribute(Attributes::MinSize); break; case lltok::kw_alignstack: { unsigned Alignment; @@ -1012,7 +1012,7 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { case lltok::kw_nonlazybind: case lltok::kw_returns_twice: case lltok::kw_address_safety: - case lltok::kw_forcesizeopt: + case lltok::kw_minsize: if (AttrKind != 2) HaveError |= Error(AttrLoc, "invalid use of function-only attribute"); break; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6cffc52d17..036686d318 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -110,7 +110,7 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, - kw_forcesizeopt, + kw_minsize, kw_type, kw_opaque, diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 81eec3c9ac..b4f0b174b5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -401,8 +401,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - assert(GV->getType()->isPointerTy() && "GV must be a pointer type!"); - unsigned PtrSize = TD->getTypeSizeInBits(GV->getType())/8; + unsigned PtrSize = TD->getPointerSizeInBits()/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); @@ -1357,7 +1356,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { // Emit the function pointers in the target-specific order const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment(0)); + unsigned Align = Log2_32(TD->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), priority_order); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = @@ -1538,9 +1537,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { if (Offset == 0) return Base; - assert(CE->getType()->isPointerTy() && "We must have a pointer type!"); // Truncate/sext the offset to the pointer size. - unsigned Width = TD.getTypeSizeInBits(CE->getType()); + unsigned Width = TD.getPointerSizeInBits(); if (Width < 64) Offset = SignExtend64(Offset, Width); @@ -1562,7 +1560,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()), + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), false/*ZExt*/); return lowerConstant(Op, AP); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 6c17af2e8c..d94e1fe61b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { switch (Encoding & 0x07) { default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(0); + case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: return 4; case dwarf::DW_EH_PE_udata8: return 8; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 73e18cd817..4d73b3c222 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -200,7 +200,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: - Size = Asm->getDataLayout().getPointerSize(0); break; + Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -222,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); - case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(0); + case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -249,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(0); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG @@ -273,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getDataLayout().getPointerSize(0); + return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 466dc69da2..64d6186d91 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -384,7 +384,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize(0)); + * Asm->getDataLayout().getPointerSize()); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -424,7 +424,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DISubprogram InlinedSP = getDISubprogram(DS); DIE *OriginDIE = TheCU->getDIE(InlinedSP); if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for inlined subprogram."); + DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); return NULL; } @@ -433,7 +433,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for a inlined scope!"); + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); } assert(StartLabel->isDefined() && "Invalid starting label for an inlined scope!"); @@ -450,7 +450,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize(0)); + * Asm->getDataLayout().getPointerSize()); for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -878,9 +878,9 @@ void DwarfDebug::endModule() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); // End text sections. - for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { - Asm->OutStreamer.SwitchSection(SectionMap[i]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i)); + for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { + Asm->OutStreamer.SwitchSection(SectionMap[I]); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); } // Compute DIE offsets and sizes. @@ -1793,7 +1793,7 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0)); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); emitDIE(Die); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); @@ -1839,14 +1839,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(0); Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0) + 1); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize(0), + Asm->getDataLayout().getPointerSize(), 0/*AddrSpace*/); // Mark end of matrix. @@ -2075,7 +2075,7 @@ void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(0); + unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; for (SmallVector<DotDebugLocEntry, 4>::iterator @@ -2172,7 +2172,7 @@ void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(0); + unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVector<const MCSymbol *, 8>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { @@ -2230,7 +2230,7 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Dwarf Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0)); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { @@ -2261,7 +2261,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize(0),0); + Asm->getDataLayout().getPointerSize(),0); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 5508674bc9..475c6f86d9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -21,9 +21,9 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" @@ -231,7 +231,7 @@ class DwarfDebug { /// SectionMap - Provides a unique id per text section. /// - UniqueVector<const MCSection*> SectionMap; + SetVector<const MCSection*> SectionMap; /// CurrentFnArguments - List of Arguments (DbgValues) for current function. SmallVector<DbgVariable *, 8> CurrentFnArguments; diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 31d07141a1..08fb6b3f52 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() { // that we're omitting that bit. TTypeEncoding = dwarf::DW_EH_PE_omit; // dwarf::DW_EH_PE_absptr - TypeFormatSize = Asm->getDataLayout().getPointerSize(0); + TypeFormatSize = Asm->getDataLayout().getPointerSize(); } else { // Okay, we have actual filters or typeinfos to emit. As such, we need to // pick a type encoding for them. We're about to emit a list of pointers to diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index d0e27d1d04..f7c011968c 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(0); + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(getModule(), AP, "code_end"); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 8de541de3d..f4755bb163 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -388,9 +388,16 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { const TargetFrameLowering *TFI = TM->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); - for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), - RE = FI->roots_end(); RI != RE; ++RI) - RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); + ++RI; + } + } } bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 35f9e270dd..b7c9f17df9 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -155,21 +155,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) { Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context, 0), (Type *)0); + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - TD.getIntPtrType(Context, 0), (Type *)0); + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), - TD.getIntPtrType(Context, 0), (Type *)0); + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -497,7 +497,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -508,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -519,7 +519,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - IntegerType *IntPtr = TD.getIntPtrType(CI->getArgOperand(0)->getType()); + Type *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 2e4496db00..34b24b6085 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -145,7 +145,8 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { instr_iterator I = instr_begin(), E = instr_end(); while (I != E && I->isPHI()) ++I; - assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); + assert((I == E || !I->isInsideBundle()) && + "First non-phi MI cannot be inside a bundle!"); return I; } @@ -156,7 +157,7 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { ++I; // FIXME: This needs to change if we wish to bundle labels / dbg_values // inside the bundle. - assert(!I->isInsideBundle() && + assert((I == E || !I->isInsideBundle()) && "First non-phi / non-label instruction is inside a bundle!"); return I; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index f11785070b..91d5211857 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -550,7 +550,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { // address of a block, in which case it is the pointer size. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerSize(0); + return TD.getPointerSize(); case MachineJumpTableInfo::EK_GPRel64BlockAddress: return 8; case MachineJumpTableInfo::EK_GPRel32BlockAddress: @@ -570,7 +570,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: - return TD.getPointerABIAlignment(0); + return TD.getPointerABIAlignment(); case MachineJumpTableInfo::EK_GPRel64BlockAddress: return TD.getABIIntegerTypeAlignment(64); case MachineJumpTableInfo::EK_GPRel32BlockAddress: diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e8885761db..ce8d52000b 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1015,9 +1015,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, unsigned MachineInstr::getBundleSize() const { assert(isBundle() && "Expecting a bundle"); - MachineBasicBlock::const_instr_iterator I = *this; + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); unsigned Size = 0; - while ((++I)->isInsideBundle()) { + while ((++I != E) && I->isInsideBundle()) { ++Size; } assert(Size > 1 && "Malformed bundle"); diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index dca68da2f3..69a3ae84ec 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -707,8 +707,9 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { report("Asm string must be an external symbol", MI); if (!MI->getOperand(1).isImm()) report("Asm flags must be an immediate", MI); - // Allowed flags are Extra_HasSideEffects = 1, and Extra_IsAlignStack = 2. - if (!isUInt<2>(MI->getOperand(1).getImm())) + // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. + if (!isUInt<5>(MI->getOperand(1).getImm())) report("Unknown asm flags", &MI->getOperand(1), 1); assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed"); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index d6ed36ef95..e426fe23c0 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -175,7 +175,7 @@ namespace { unsigned VirtReg, unsigned Hint); LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void spillAll(MachineInstr *MI); + void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); void addRetOperands(MachineBasicBlock *MBB); }; @@ -314,7 +314,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, } /// spillAll - Spill all dirty virtregs without killing them. -void RAFast::spillAll(MachineInstr *MI) { +void RAFast::spillAll(MachineBasicBlock::iterator MI) { if (LiveVirtRegs.empty()) return; isBulkSpilling = true; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 9320993d90..02ebce7a11 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -118,7 +118,6 @@ private: typedef std::vector<AllowedSet> AllowedSetMap; typedef std::pair<unsigned, unsigned> RegPair; typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - typedef std::vector<PBQP::Graph::NodeItr> NodeVector; typedef std::set<unsigned> RegSet; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2ca67d6325..e47a677b77 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -198,12 +198,6 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", char RegisterCoalescer::ID = 0; -static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { - if (!a) return b; - if (!b) return a; - return tri.composeSubRegIndices(a, b); -} - static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, unsigned &Src, unsigned &Dst, unsigned &SrcSub, unsigned &DstSub) { @@ -214,8 +208,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, SrcSub = MI->getOperand(1).getSubReg(); } else if (MI->isSubregToReg()) { Dst = MI->getOperand(0).getReg(); - DstSub = compose(tri, MI->getOperand(0).getSubReg(), - MI->getOperand(3).getImm()); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); } else @@ -354,7 +348,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); + return TRI.composeSubRegIndices(SrcIdx, SrcSub) == + TRI.composeSubRegIndices(DstIdx, DstSub); } } @@ -430,7 +425,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); - if (!CP.isCoalescable(ACopyMI)) + // Don't allow any partial copies, even if isCoalescable() allows them. + if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; // Get the LiveRange in IntB that this value number starts with. @@ -1314,7 +1310,8 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) continue; - L |= TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg())); + L |= TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); if (MO->readsReg()) Redef = true; } @@ -1492,6 +1489,20 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { if ((V.WriteLanes & OtherV.ValidLanes) == 0) return CR_Replace; + // If the other live range is killed by DefMI and the live ranges are still + // overlapping, it must be because we're looking at an early clobber def: + // + // %dst<def,early-clobber> = ASM %src<kill> + // + // In this case, it is illegal to merge the two live ranges since the early + // clobber def would clobber %src before it was read. + if (OtherLRQ.isKill()) { + // This case where the def doesn't overlap the kill is handled above. + assert(VNI->def.isEarlyClobber() && + "Only early clobber defs can overlap a kill"); + return CR_Impossible; + } + // VNI is clobbering live lanes in OtherVNI, but there is still the // possibility that no instructions actually read the clobbered lanes. // If we're clobbering all the lanes in OtherVNI, at least one must be read. @@ -1632,8 +1643,8 @@ bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, continue; if (!MO->readsReg()) continue; - if (Lanes & - TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg()))) + if (Lanes & TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) return true; } return false; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 8dcbf83353..496473d3a4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -71,7 +71,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { // object. We don't have to worry about the case where the // object address is somehow being computed by the multiply, // because our callers only care when the result is an - // identifibale object. + // identifiable object. if (U->getOpcode() != Instruction::Add || (!isa<ConstantInt>(U->getOperand(1)) && Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0a85179293..8f469ae2b5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -393,10 +393,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { - // No compile time optimizations on this type. - if (Op.getValueType() == MVT::ppcf128) - return 0; - // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -5705,7 +5701,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // fold (fadd c1, c2) -> c1 + c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -5733,6 +5729,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // If allow, fold (fadd (fneg x), x) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + return DAG.getConstantFP(0.0, VT); + } + + // If allow, fold (fadd x, (fneg x)) -> 0.0 + if (DAG.getTarget().Options.UnsafeFPMath && + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + return DAG.getConstantFP(0.0, VT); + } + // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because // we are reducing the number of rounding steps. @@ -5892,7 +5900,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && @@ -5984,7 +5992,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -6042,6 +6050,12 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } if (N0CFP && N0CFP->isExactlyValue(1.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) @@ -6121,11 +6135,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { // Compute the reciprocal 1.0 / c2. APFloat N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 @@ -6168,7 +6182,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP && VT != MVT::ppcf128) + if (N0CFP && N1CFP) return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); return SDValue(); @@ -6181,7 +6195,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold + if (N0CFP && N1CFP) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); if (N1CFP) { @@ -6231,7 +6245,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6288,7 +6302,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128 && + if (N0C && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -6343,7 +6357,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6356,7 +6370,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP && N0.getValueType() != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x @@ -6410,7 +6424,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the @@ -6497,7 +6511,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6509,7 +6523,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6521,7 +6535,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); return SDValue(); @@ -6538,7 +6552,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } // fold (fabs c1) -> fabs(c1) - if (N0CFP && VT != MVT::ppcf128) + if (N0CFP) return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -9403,34 +9417,38 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); // Get a SetCC of the condition - // FIXME: Should probably make sure that setcc is legal if we ever have a - // target where it isn't. - SDValue Temp, SCC; - // cast from setcc result type to select result type - if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC); - if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); - else + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, + LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), N2.getValueType(), SCC); - } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), - N2.getValueType(), SCC); - } + } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorkList(SCC.getNode()); + AddToWorkList(Temp.getNode()); - if (N2C->getAPIntValue() == 1) - return Temp; + if (N2C->getAPIntValue() == 1) + return Temp; - // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + // shl setcc result by log2 n2c + return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); + } } // Check to see if this is the equivalent of setcc diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 2ddc07cc63..4854cf7b26 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -101,7 +101,8 @@ bool FastISel::hasTrivialKill(const Value *V) const { // No-op casts are trivially coalesced by fast-isel. if (const CastInst *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(TD) && !hasTrivialKill(Cast->getOperand(0))) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) return false; // GEPs with all zero indices are trivially coalesced by fast-isel. @@ -174,7 +175,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. Reg = - getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getType()))); + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { if (CF->isNullValue()) { Reg = TargetMaterializeFloatZero(CF); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index d1baa3f716..a8381b25ba 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -897,7 +897,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - // Add the HasSideEffect and isAlignStack bits. + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. int64_t ExtraInfo = cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d661971bb8..f000ce38d3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -91,11 +91,6 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); - // PPC long double cannot be converted to any other type. - if (VT == MVT::ppcf128 || - &Val.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; @@ -1612,10 +1607,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { - // No compile time operations on this type yet. - if (N1C->getValueType(0) == MVT::ppcf128) - return SDValue(); - APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); switch (Cond) { default: break; @@ -2447,8 +2438,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - // No compile time operations on ppcf128. - if (VT == MVT::ppcf128) break; APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, @@ -2477,61 +2466,59 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, // Constant fold unary operations with a floating point constant operand. if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { APFloat V = C->getValueAPF(); // make copy - if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) { - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FABS: - V.clearSign(); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) return getConstantFP(V, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - integerPart x[2]; - bool ignored; - assert(integerPartWidth >= 64); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), - Opcode==ISD::FP_TO_SINT, - APFloat::rmTowardZero, &ignored); - if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - APInt api(VT.getSizeInBits(), x); - return getConstant(api, VT); - } - case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); - else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + assert(integerPartWidth >= 64); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual break; - } + APInt api(VT.getSizeInBits(), x); + return getConstant(api, VT); + } + case ISD::BITCAST: + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), VT); + break; } } @@ -3052,7 +3039,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Cannonicalize constant to RHS if commutative std::swap(N1CFP, N2CFP); std::swap(N1, N2); - } else if (N2CFP && VT != MVT::ppcf128) { + } else if (N2CFP) { APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); APFloat::opStatus s; switch (Opcode) { @@ -3449,12 +3436,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); - Type *vtType = VT.isExtended() ? VT.getTypeForEVT(*DAG.getContext()) : NULL; - unsigned AS = (vtType && vtType->isPointerTy()) ? - cast<PointerType>(vtType)->getAddressSpace() : 0; if (VT == MVT::Other) { - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() || TLI.allowsUnalignedMemoryAccesses(VT)) { VT = TLI.getPointerTy(); } else { @@ -3804,8 +3788,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - unsigned AS = SrcPtrInfo.getAddrSpace(); - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3860,8 +3843,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - unsigned AS = SrcPtrInfo.getAddrSpace(); - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); + Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); @@ -3910,8 +3892,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - unsigned AS = DstPtrInfo.getAddrSpace(); - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext(), AS); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 6c9d001a1f..be3168618e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1255,7 +1255,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true)); + /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -6150,7 +6150,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3. + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. unsigned ExtraInfo = 0; if (IA->hasSideEffects()) ExtraInfo |= InlineAsm::Extra_HasSideEffects; @@ -6158,6 +6159,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_IsAlignStack; // Set the asm dialect. ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + } + } + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, TLI.getPointerTy())); @@ -6543,7 +6565,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < CLI.NumFixedArgs); + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5abc55ba8e..49f55e2fc6 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1032,7 +1032,7 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); } } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4d30f04598..6df4a0aa2a 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -93,9 +93,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(0); + unsigned Size = TM.getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment(0)); + Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index df33a94ca7..a9058bc7f6 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -60,116 +60,108 @@ STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { - class TwoAddressInstructionPass : public MachineFunctionPass { - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; - MachineRegisterInfo *MRI; - LiveVariables *LV; - SlotIndexes *Indexes; - LiveIntervals *LIS; - AliasAnalysis *AA; - CodeGenOpt::Level OptLevel; - - // DistanceMap - Keep track the distance of a MI from the start of the - // current basic block. - DenseMap<MachineInstr*, unsigned> DistanceMap; - - // SrcRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies from physical - // registers to virtual registers. e.g. v1024 = move r0. - DenseMap<unsigned, unsigned> SrcRegMap; - - // DstRegMap - A map from virtual registers to physical registers which - // are likely targets to be coalesced to due to copies to physical - // registers from virtual registers. e.g. r1 = move v1024. - DenseMap<unsigned, unsigned> DstRegMap; - - /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen - /// during the initial walk of the machine function. - SmallVector<MachineInstr*, 16> RegSequences; - - bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned Reg, - MachineBasicBlock::iterator OldPos); - - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef); - - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist); +class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + MachineRegisterInfo *MRI; + LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; + AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; + + // The current basic block being processed. + MachineBasicBlock *MBB; + + // DistanceMap - Keep track the distance of a MI from the start of the + // current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // Set of already processed instructions in the current block. + SmallPtrSet<MachineInstr*, 8> Processed; - bool CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist); + // SrcRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies from physical registers to + // virtual registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; - bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + // DstRegMap - A map from virtual registers to physical registers which are + // likely targets to be coalesced to due to copies to physical registers from + // virtual registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; - bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned RegA, unsigned RegB, unsigned Dist); + /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen + /// during the initial walk of the machine function. + SmallVector<MachineInstr*, 16> RegSequences; - bool isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, MachineBasicBlock *MBB); + bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, + MachineBasicBlock::iterator OldPos); - bool RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); - bool RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg); + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); - bool TryInstructionTransform(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist); - void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist); - void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); - typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; - typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; - bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); - void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, unsigned Dist); - void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); + bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); - /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part - /// of the de-ssa process. This replaces sources of REG_SEQUENCE as - /// sub-register references of the register defined by REG_SEQUENCE. - bool EliminateRegSequences(); + bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); - public: - static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(ID) { - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + bool tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); - AU.addPreserved<SlotIndexes>(); - AU.addPreserved<LiveIntervals>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } + void scanUses(unsigned DstReg); - /// runOnMachineFunction - Pass entry point. - bool runOnMachineFunction(MachineFunction&); - }; -} + void processCopy(MachineInstr *MI); + + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + + /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of + /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register + /// references of the register defined by REG_SEQUENCE. + bool eliminateRegSequences(); + +public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - Pass entry point. + bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace char TwoAddressInstructionPass::ID = 0; INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", @@ -180,13 +172,13 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; -/// Sink3AddrInstruction - A two-address instruction has been converted to a +/// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce /// register pressure. -bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, - MachineInstr *MI, unsigned SavedReg, - MachineBasicBlock::iterator OldPos) { +bool TwoAddressInstructionPass:: +sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { // FIXME: Shouldn't we be trying to do this before we three-addressify the // instruction? After this transformation is done, we no longer need // the instruction to be in three-address form. @@ -299,13 +291,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, return true; } -/// NoUseAfterLastDef - Return true if there are no intervening uses between the +/// noUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last /// def location by reference -bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, - MachineBasicBlock *MBB, unsigned Dist, - unsigned &LastDef) { +bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, + unsigned &LastDef) { LastDef = 0; unsigned LastUse = Dist; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), @@ -465,10 +456,9 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { /// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, - unsigned regC, - MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Dist) { +TwoAddressInstructionPass:: +isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -516,13 +506,13 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. unsigned LastDefC = 0; - if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC)) + if (!noUseAfterLastDef(regC, Dist, LastDefC)) return false; // If there is a use of regB between its last def (could be livein) and this // instruction, then go ahead and make this transformation. unsigned LastDefB = 0; - if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB)) + if (!noUseAfterLastDef(regB, Dist, LastDefB)) return true; // Since there are no intervening uses for both registers, then commute @@ -530,13 +520,12 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, return LastDefB && LastDefC && LastDefC > LastDefB; } -/// CommuteInstruction - Commute a two-address instruction and update the basic +/// commuteInstruction - Commute a two-address instruction and update the basic /// block, distance map, and live variables if needed. Return true if it is /// successful. -bool -TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, - MachineFunction::iterator &mbbi, - unsigned RegB, unsigned RegC, unsigned Dist) { +bool TwoAddressInstructionPass:: +commuteInstruction(MachineBasicBlock::iterator &mi, + unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); @@ -555,8 +544,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (Indexes) Indexes->replaceMachineInstrInMaps(MI, NewMI); - mbbi->insert(mi, NewMI); // Insert the new inst - mbbi->erase(mi); // Nuke the old inst. + MBB->insert(mi, NewMI); // Insert the new inst + MBB->erase(mi); // Nuke the old inst. mi = NewMI; DistanceMap.insert(std::make_pair(NewMI, Dist)); } @@ -588,51 +577,51 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } -/// ConvertInstTo3Addr - Convert the specified two-address instruction into a +/// convertInstTo3Addr - Convert the specified two-address instruction into a /// three address one. Return true if this transformation was successful. bool -TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, +TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist) { - MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); - if (NewMI) { - DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); - DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); - bool Sunk = false; + // FIXME: Why does convertToThreeAddress() need an iterator reference? + MachineFunction::iterator MFI = MBB; + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + assert(MBB == MFI && "convertToThreeAddress changed iterator reference"); + if (!NewMI) + return false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; - if (NewMI->findRegisterUseOperand(RegB, false, TRI)) - // FIXME: Temporary workaround. If the new instruction doesn't - // uses RegB, convertToThreeAddress must have created more - // then one instruction. - Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi); + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); - mbbi->erase(mi); // Nuke the old inst. + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = sink3AddrInstruction(NewMI, RegB, mi); - if (!Sunk) { - DistanceMap.insert(std::make_pair(NewMI, Dist)); - mi = NewMI; - nmi = llvm::next(mi); - } + MBB->erase(mi); // Nuke the old inst. - // Update source and destination register maps. - SrcRegMap.erase(RegA); - DstRegMap.erase(RegB); - return true; + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = llvm::next(mi); } - return false; + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); + return true; } -/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// scanUses - Scan forward recursively for only uses, update maps if the use /// is a copy or a two-address instruction. void -TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed) { +TwoAddressInstructionPass::scanUses(unsigned DstReg) { SmallVector<unsigned, 4> VirtRegPairs; bool IsDstPhys; bool IsCopy = false; @@ -676,7 +665,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, } } -/// ProcessCopy - If the specified instruction is not yet processed, process it +/// processCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in /// point-to maps used to determine future optimizations. e.g. @@ -688,9 +677,7 @@ TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, /// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is /// potentially joined with r1 on the output side. It's worthwhile to commute /// 'add' to eliminate a copy. -void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, - MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &Processed) { +void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { if (Processed.count(MI)) return; @@ -707,21 +694,20 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - ScanUses(DstReg, MBB, Processed); + scanUses(DstReg); } Processed.insert(MI); return; } -/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// rescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. -bool -TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -853,8 +839,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, /// isDefTooClose - Return true if the re-scheduling will put the given /// instruction too close to the defs of its register dependencies. bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, - MachineInstr *MI, - MachineBasicBlock *MBB) { + MachineInstr *MI) { for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DefMI = &*DI; @@ -873,15 +858,14 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return false; } -/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// rescheduleKillAboveMI - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the kill instruction above the /// current two-address instruction in order to eliminate the need for the /// copy. -bool -TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - unsigned Reg) { +bool TwoAddressInstructionPass:: +rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { // Bail immediately if we don't have LV available. We use it to find kills // efficiently. if (!LV) @@ -918,7 +902,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (MO.isUse()) { if (!MOReg) continue; - if (isDefTooClose(MOReg, DI->second, MI, MBB)) + if (isDefTooClose(MOReg, DI->second, MI)) return false; if (MOReg == Reg && !MO.isKill()) return false; @@ -1006,18 +990,16 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, return true; } -/// TryInstructionTransform - For the case where an instruction has a single +/// tryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or /// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: -TryInstructionTransform(MachineBasicBlock::iterator &mi, +tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist, - SmallPtrSet<MachineInstr*, 8> &Processed) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { if (OptLevel == CodeGenOpt::None) return false; @@ -1030,7 +1012,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, bool regBKilled = isKilled(MI, regB, MRI, TII); if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); + scanUses(regA); // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; @@ -1051,7 +1033,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1059,7 +1041,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } // If it's profitable to commute, try to do so. - if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; @@ -1068,7 +1050,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + if (rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1078,7 +1060,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. - if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { + if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } @@ -1087,7 +1069,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. - if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + if (rescheduleKillAboveMI(mi, nmi, regB)) { ++NumReSchedUps; return true; } @@ -1131,8 +1113,8 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Tentatively insert the instructions into the block so that they // look "normal" to the transformation logic. - mbbi->insert(mi, NewMIs[0]); - mbbi->insert(mi, NewMIs[1]); + MBB->insert(mi, NewMIs[0]); + MBB->insert(mi, NewMIs[1]); DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1142,8 +1124,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = - TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist, Processed); + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1378,16 +1359,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MRI->leaveSSA(); TiedOperandMap TiedOperands; - - SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); - mbbi != mbbe; ++mbbi) { + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MBB = MBBI; unsigned Dist = 0; DistanceMap.clear(); SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); - for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = llvm::next(mi); if (mi->isDebugValue()) { @@ -1401,7 +1381,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { DistanceMap.insert(std::make_pair(mi, ++Dist)); - ProcessCopy(&*mi, &*mbbi, Processed); + processCopy(&*mi); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. @@ -1426,8 +1406,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); @@ -1467,7 +1446,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. - MadeChange |= EliminateRegSequences(); + MadeChange |= eliminateRegSequences(); return MadeChange; } @@ -1514,127 +1493,6 @@ static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { return First; } -/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are -/// EXTRACT_SUBREG from the same register and to the same virtual register -/// with different sub-register indices, attempt to combine the -/// EXTRACT_SUBREGs and pre-coalesce them. e.g. -/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0 -/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6 -/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5 -/// Since D subregs 5, 6 can combine to a Q register, we can coalesce -/// reg1026 to reg1029. -void -TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, - unsigned DstReg) { - SmallSet<unsigned, 4> Seen; - for (unsigned i = 0, e = Srcs.size(); i != e; ++i) { - unsigned SrcReg = Srcs[i]; - if (!Seen.insert(SrcReg)) - continue; - - // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); - if (!SrcDefMI || !DstDefMI || - SrcDefMI->getParent() != DstDefMI->getParent()) - continue; - - // If there are no other uses than copies which feed into - // the reg_sequence, then we might be able to coalesce them. - bool CanCoalesce = true; - SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { - CanCoalesce = false; - break; - } - SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); - DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); - } - - if (!CanCoalesce || SrcSubIndices.size() < 2) - continue; - - // Check that the source subregisters can be combined. - std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); - unsigned NewSrcSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, - NewSrcSubIdx)) - continue; - - // Check that the destination subregisters can also be combined. - std::sort(DstSubIndices.begin(), DstSubIndices.end()); - unsigned NewDstSubIdx = 0; - if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, - NewDstSubIdx)) - continue; - - // If neither source nor destination can be combined to the full register, - // just give up. This could be improved if it ever matters. - if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) - continue; - - // Now that we know that all the uses are extract_subregs and that those - // subregs can somehow be combined, scan all the extract_subregs again to - // make sure the subregs are in the right order and can be composed. - MachineInstr *SomeMI = 0; - CanCoalesce = true; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - assert(UseMI->isCopy()); - unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); - unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); - assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); - if ((NewDstSubIdx == 0 && - TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || - (NewSrcSubIdx == 0 && - TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { - CanCoalesce = false; - break; - } - // Keep track of one of the uses. Preferably the first one which has a - // <def,undef> flag. - if (!SomeMI || UseMI->getOperand(0).isUndef()) - SomeMI = UseMI; - } - if (!CanCoalesce) - continue; - - // Insert a copy to replace the original. - MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, - SomeMI->getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define | - getUndefRegState(SomeMI->getOperand(0).isUndef()), - NewDstSubIdx) - .addReg(SrcReg, 0, NewSrcSubIdx); - - // Remove all the old extract instructions. - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SrcReg), - UE = MRI->use_nodbg_end(); UI != UE; ) { - MachineInstr *UseMI = &*UI; - ++UI; - if (UseMI == CopyMI) - continue; - assert(UseMI->isCopy()); - // Move any kills to the new copy or extract instruction. - if (UseMI->getOperand(1).isKill()) { - CopyMI->getOperand(1).setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); - } - UseMI->eraseFromParent(); - } - } -} - static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, MachineRegisterInfo *MRI) { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), @@ -1646,7 +1504,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, return false; } -/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part +/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part /// of the de-ssa process. This replaces sources of REG_SEQUENCE as /// sub-register references of the register defined by REG_SEQUENCE. e.g. /// @@ -1654,7 +1512,7 @@ static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, /// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6 /// => /// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ... -bool TwoAddressInstructionPass::EliminateRegSequences() { +bool TwoAddressInstructionPass::eliminateRegSequences() { if (RegSequences.empty()) return false; @@ -1770,12 +1628,6 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); } - - // Try coalescing some EXTRACT_SUBREG instructions. This can create - // INSERT_SUBREG instructions that must have <undef> flags added by - // LiveIntervalAnalysis, so only run it when LiveVariables is available. - if (LV) - CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 1bfd126a12..ab67464453 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -341,7 +341,7 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, else debug_info_data.getU64(offset_ptr); break; - + default: *offset_ptr = offset; return false; @@ -411,9 +411,10 @@ DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, const char* DWARFDebugInfoEntryMinimal::getAttributeValueAsString( - const DWARFCompileUnit* cu, - const uint16_t attr, - const char* fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + const char* fail_value) + const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) { DataExtractor stringExtractor(cu->getContext().getStringSection(), @@ -425,9 +426,9 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsString( uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getUnsigned(); @@ -436,9 +437,9 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( int64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - int64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + int64_t fail_value) const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getSigned(); @@ -447,9 +448,10 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) const { + const DWARFCompileUnit* cu, + const uint16_t attr, + uint64_t fail_value) + const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) return form_value.getReference(cu); @@ -457,7 +459,8 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( } bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU, - uint64_t &LowPC, uint64_t &HighPC) const { + uint64_t &LowPC, + uint64_t &HighPC) const { HighPC = -1ULL; LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL); if (LowPC != -1ULL) @@ -488,7 +491,9 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU, bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFCompileUnit *CU, const uint64_t Address) const { + const DWARFCompileUnit *CU, + const uint64_t Address) + const { if (isNULL()) return false; uint64_t LowPC, HighPC; @@ -505,8 +510,8 @@ DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( } const char* -DWARFDebugInfoEntryMinimal::getSubroutineName( - const DWARFCompileUnit *CU) const { +DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFCompileUnit *CU) + const { if (!isSubroutineDIE()) return 0; // Try to get mangled name if possible. @@ -540,9 +545,10 @@ DWARFDebugInfoEntryMinimal::getSubroutineName( return 0; } -void DWARFDebugInfoEntryMinimal::getCallerFrame( - const DWARFCompileUnit *CU, uint32_t &CallFile, uint32_t &CallLine, - uint32_t &CallColumn) const { +void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFCompileUnit *CU, + uint32_t &CallFile, + uint32_t &CallLine, + uint32_t &CallColumn) const { CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0); CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0); CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0); @@ -550,7 +556,9 @@ void DWARFDebugInfoEntryMinimal::getCallerFrame( DWARFDebugInfoEntryMinimal::InlinedChain DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( - const DWARFCompileUnit *CU, const uint64_t Address) const { + const DWARFCompileUnit *CU, + const uint64_t Address) + const { DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain; if (isNULL()) return InlinedChain; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 99f6ec691a..05987f2b74 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -17,7 +17,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ADT/SmallString.h" @@ -268,7 +267,7 @@ public: void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, const std::vector<std::string> &InputArgv) { clear(); // Free the old contents. - unsigned PtrSize = EE->getDataLayout()->getPointerSize(0); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(); Array = new char[(InputArgv.size()+1)*PtrSize]; DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); @@ -343,7 +342,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { #ifndef NDEBUG /// isTargetNullPtr - Return whether the target pointer stored at Loc is null. static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) { - unsigned PtrSize = EE->getDataLayout()->getPointerSize(0); + unsigned PtrSize = EE->getDataLayout()->getPointerSize(); for (unsigned i = 0; i < PtrSize; ++i) if (*(i + (uint8_t*)Loc)) return false; @@ -645,19 +644,17 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::PtrToInt: { GenericValue GV = getConstantValue(Op0); - assert(CE->getOperand(1)->getType()->isPointerTy() && - "Must be a pointer type!"); - uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getOperand(1)->getType()); + uint32_t PtrWidth = TD->getTypeSizeInBits(Op0->getType()); + assert(PtrWidth <= 64 && "Bad pointer width"); GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal)); + uint32_t IntWidth = TD->getTypeSizeInBits(CE->getType()); + GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth); return GV; } case Instruction::IntToPtr: { GenericValue GV = getConstantValue(Op0); - assert(CE->getOperand(1)->getType()->isPointerTy() && - "Must be a pointer type!"); uint32_t PtrWidth = TD->getTypeSizeInBits(CE->getType()); - if (PtrWidth != GV.IntVal.getBitWidth()) - GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); + GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth); assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width"); GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue())); return GV; diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 326bf79c58..5202b09165 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1054,8 +1054,7 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy, GenericValue Dest, Src = getOperandValue(SrcVal, SF); assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction"); - unsigned AS = cast<PointerType>(DstTy)->getAddressSpace(); - uint32_t PtrSize = TD.getPointerSizeInBits(AS); + uint32_t PtrSize = TD.getPointerSizeInBits(); if (PtrSize != Src.IntVal.getBitWidth()) Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index e70efd0886..e3b90fdf78 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -378,7 +378,7 @@ GenericValue lle_X_sprintf(FunctionType *FT, case 'x': case 'X': if (HowLong >= 1) { if (HowLong == 1 && - TheInterpreter->getDataLayout()->getPointerSizeInBits(0) == 64 && + TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 && sizeof(long) < sizeof(int64_t)) { // Make sure we use %lld with a 64 bit argument because we might be // compiling LLI on a 32 bit compiler. diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index bcd5b26365..19c197903a 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -14,9 +14,7 @@ #include "JIT.h" #include "JITDwarfEmitter.h" -#include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" @@ -68,7 +66,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, void JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector<MachineMove> &Moves) const { - unsigned PointerSize = TD->getPointerSize(0); + unsigned PointerSize = TD->getPointerSize(); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; MCSymbol *BaseLabel = 0; @@ -380,7 +378,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, for (unsigned i = 0, e = CallSites.size(); i < e; ++i) SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); - unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(0); + unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length @@ -456,12 +454,12 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, const GlobalVariable *GV = TypeInfos[M - 1]; if (GV) { - if (TD->getPointerSize(GV->getType()->getAddressSpace()) == sizeof(int32_t)) + if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); else JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV)); } else { - if (TD->getPointerSize(0) == sizeof(int32_t)) + if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32(0); else JCE->emitInt64(0); @@ -483,7 +481,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, unsigned char* JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { - unsigned PointerSize = TD->getPointerSize(0); + unsigned PointerSize = TD->getPointerSize(); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; @@ -543,7 +541,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const { - unsigned PointerSize = TD->getPointerSize(0); + unsigned PointerSize = TD->getPointerSize(); // EH frame header. unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index fef71768b4..2911a50772 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,4 +1,3 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp - MCJITMemoryManager.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 68c0c34f61..a0ad985145 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" -#include "MCJITMemoryManager.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -46,7 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - return new MCJIT(M, TM, new MCJITMemoryManager(JMM), GVsWithCode); + return new MCJIT(M, TM, JMM, GVsWithCode); } MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp deleted file mode 100644 index 457fe5e3ef..0000000000 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//==-- MCJITMemoryManager.cpp - Definition for the Memory Manager -*-C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCJITMemoryManager.h" - -using namespace llvm; - -void MCJITMemoryManager::anchor() { } diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h deleted file mode 100644 index 441aaeb5ec..0000000000 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ /dev/null @@ -1,50 +0,0 @@ -//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H -#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H - -#include "llvm/Module.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include <assert.h> - -namespace llvm { - -// The MCJIT memory manager is a layer between the standard JITMemoryManager -// and the RuntimeDyld interface that maps objects, by name, onto their -// matching LLVM IR counterparts in the module(s) being compiled. -class MCJITMemoryManager : public RTDyldMemoryManager { - virtual void anchor(); - OwningPtr<JITMemoryManager> JMM; - -public: - MCJITMemoryManager(JITMemoryManager *jmm) : - JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()) {} - - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { - return JMM->allocateDataSection(Size, Alignment, SectionID); - } - - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { - return JMM->allocateCodeSection(Size, Alignment, SectionID); - } - - virtual void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) { - return JMM->getPointerToNamedFunction(Name, AbortOnFailure); - } - -}; - -} // End llvm namespace - -#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index ff05c82aec..950b4208a9 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -17,6 +17,7 @@ #include "RuntimeDyldELF.h" #include "RuntimeDyldMachO.h" #include "llvm/Support/Path.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::object; @@ -27,16 +28,6 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { -namespace { - // Helper for extensive error checking in debug builds. - error_code Check(error_code Err) { - if (Err) { - report_fatal_error(Err.message()); - } - return Err; - } -} // end anonymous namespace - // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { // First, resolve relocations associated with external symbols. @@ -78,9 +69,9 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { // Used sections from the object file ObjSectionToIDMap LocalSections; - // Common symbols requiring allocation, and the total size required to - // allocate all common symbols. + // Common symbols requiring allocation, with their sizes and alignments CommonSymbolMap CommonSymbols; + // Maximum required total memory to allocate all common symbols uint64_t CommonSize = 0; error_code err; @@ -100,10 +91,11 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { bool isCommon = flags & SymbolRef::SF_Common; if (isCommon) { // Add the common symbols to a list. We'll allocate them all below. + uint64_t Align = getCommonSymbolAlignment(*i); uint64_t Size = 0; Check(i->getSize(Size)); - CommonSize += Size; - CommonSymbols[*i] = Size; + CommonSize += Size + Align; + CommonSymbols[*i] = CommonSymbolInfo(Size, Align); } else { if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || @@ -201,11 +193,20 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, // Assign the address of each symbol for (CommonSymbolMap::const_iterator it = CommonSymbols.begin(), itEnd = CommonSymbols.end(); it != itEnd; it++) { + uint64_t Size = it->second.first; + uint64_t Align = it->second.second; StringRef Name; it->first.getName(Name); + if (Align) { + // This symbol has an alignment requirement. + uint64_t AlignOffset = OffsetToAlignment((uint64_t)Addr, Align); + Addr += AlignOffset; + Offset += AlignOffset; + DEBUG(dbgs() << "Allocating common symbol " << Name << " address " << + format("%p\n", Addr)); + } Obj.updateSymbolAddress(it->first, (uint64_t)Addr); SymbolTable[Name.data()] = SymbolLoc(SectionID, Offset); - uint64_t Size = it->second; Offset += Size; Addr += Size; } @@ -374,7 +375,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { writeInt32BE(Addr+32, 0x7D6903A6); // mtctr r11 writeInt32BE(Addr+36, 0xE96C0010); // ld r11, 16(r2) writeInt32BE(Addr+40, 0x4E800420); // bctr - + return Addr; } return Addr; @@ -403,14 +404,14 @@ void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value) { // Ignore relocations for sections that were not loaded if (Sections[RE.SectionID].Address != 0) { - uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; DEBUG(dbgs() << "\tSectionID: " << RE.SectionID - << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " + " << RE.Offset << " (" + << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")" << " RelType: " << RE.RelType << " Addend: " << RE.Addend << "\n"); - resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, + resolveRelocation(Sections[RE.SectionID], RE.Offset, Value, RE.RelType, RE.Addend); } } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 1073c6fc52..1ebcaf7ba8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -187,8 +187,8 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { RuntimeDyldELF::~RuntimeDyldELF() { } -void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, +void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { @@ -197,8 +197,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, llvm_unreachable("Relocation type not implemented yet!"); break; case ELF::R_X86_64_64: { - uint64_t *Target = (uint64_t*)(LocalAddress); + uint64_t *Target = reinterpret_cast<uint64_t*>(Section.Address + Offset); *Target = Value + Addend; + DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) + << " at " << format("%p\n",Target)); break; } case ELF::R_X86_64_32: @@ -208,37 +210,52 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, (Type == ELF::R_X86_64_32S && ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); - uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress); + uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset); *Target = TruncatedAddr; + DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) + << " at " << format("%p\n",Target)); break; } case ELF::R_X86_64_PC32: { - uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress); + // Get the placeholder value from the generated object since + // a previous relocation attempt may have overwritten the loaded version + uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress + + Offset); + uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset); + uint64_t FinalAddress = Section.LoadAddress + Offset; int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress; assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); - *Placeholder = TruncOffset; + *Target = TruncOffset; break; } } } -void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, - uint32_t FinalAddress, +void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { switch (Type) { case ELF::R_386_32: { - uint32_t *Target = (uint32_t*)(LocalAddress); - uint32_t Placeholder = *Target; - *Target = Placeholder + Value + Addend; + // Get the placeholder value from the generated object since + // a previous relocation attempt may have overwritten the loaded version + uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress + + Offset); + uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset); + *Target = *Placeholder + Value + Addend; break; } case ELF::R_386_PC32: { - uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress); + // Get the placeholder value from the generated object since + // a previous relocation attempt may have overwritten the loaded version + uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress + + Offset); + uint32_t *Target = reinterpret_cast<uint32_t*>(Section.Address + Offset); + uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress; - *Placeholder = RealOffset; + *Target = RealOffset; break; } default: @@ -249,16 +266,18 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, } } -void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress, - uint32_t FinalAddress, +void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { // TODO: Add Thumb relocations. - uint32_t* TargetPtr = (uint32_t*)LocalAddress; + uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset); + uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); Value += Addend; - DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress + DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " + << Section.Address + Offset << " FinalAddress: " << format("%p",FinalAddress) << " Value: " << format("%x",Value) << " Type: " << format("%x",Type) @@ -310,16 +329,18 @@ void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress, } } -void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress, - uint32_t FinalAddress, +void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend) { - uint32_t* TargetPtr = (uint32_t*)LocalAddress; + uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset); Value += Addend; - DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " << LocalAddress - << " FinalAddress: " << format("%p",FinalAddress) + DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " + << Section.Address + Offset + << " FinalAddress: " + << format("%p",Section.LoadAddress + Offset) << " Value: " << format("%x",Value) << " Type: " << format("%x",Type) << " Addend: " << format("%x",Addend) @@ -467,11 +488,12 @@ uint16_t applyPPChighest (uint64_t value) return (value >> 48) & 0xffff; } -void RuntimeDyldELF::resolvePPC64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, - uint32_t Type, - int64_t Addend) { +void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + uint8_t* LocalAddress = Section.Address + Offset; switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); @@ -495,6 +517,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(uint8_t *LocalAddress, writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; case ELF::R_PPC64_REL24 : { + uint64_t FinalAddress = (Section.LoadAddress + Offset); int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend); if (SignExtend32<24>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); @@ -521,34 +544,34 @@ void RuntimeDyldELF::resolvePPC64Relocation(uint8_t *LocalAddress, } -void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, +void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { switch (Arch) { case Triple::x86_64: - resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend); + resolveX86_64Relocation(Section, Offset, Value, Type, Addend); break; case Triple::x86: - resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + resolveX86Relocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; case Triple::arm: // Fall through. case Triple::thumb: - resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; case Triple::mips: // Fall through. case Triple::mipsel: - resolveMIPSRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; case Triple::ppc64: - resolvePPC64Relocation(LocalAddress, FinalAddress, Value, Type, Addend); + resolvePPC64Relocation(Section, Offset, Value, Type, Addend); break; default: llvm_unreachable("Unsupported CPU type!"); } @@ -628,13 +651,12 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // This is an ARM branch relocation, need to use a stub function. DEBUG(dbgs() << "\t\tThis is an ARM branch relocation."); SectionEntry &Section = Sections[Rel.SectionID]; - uint8_t *Target = Section.Address + Rel.Offset; // Look for an existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + - i->second, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -649,8 +671,9 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + - Section.StubOffset, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + Section.StubOffset, + RelType, 0); Section.StubOffset += getMaxStubSize(); } } else if (Arch == Triple::mipsel && RelType == ELF::R_MIPS_26) { @@ -668,9 +691,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Target, (uint64_t)Target, - (uint64_t)Section.Address + - i->second, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -695,9 +717,9 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, addRelocationForSection(RELo, Value.SectionID); } - resolveRelocation(Target, (uint64_t)Target, - (uint64_t)Section.Address + - Section.StubOffset, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + Section.StubOffset, + RelType, 0); Section.StubOffset += getMaxStubSize(); } } else if (Arch == Triple::ppc64) { @@ -731,8 +753,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it - resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address - + i->second, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -770,8 +792,9 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, addRelocationForSection(REl, Value.SectionID); } - resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address - + Section.StubOffset, RelType, 0); + resolveRelocation(Section, Rel.Offset, + (uint64_t)Section.Address + Section.StubOffset, + RelType, 0); if (SymType == SymbolRef::ST_Unknown) // Restore the TOC for external calls writeInt32BE(Target+4, 0xE8410028); // ld r2,40(r1) @@ -796,6 +819,13 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, } } +unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) { + // In ELF, the value of an SHN_COMMON symbol is its alignment requirement. + uint64_t Align; + Check(Sym.getValue(Align)); + return Align; +} + bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const { if (Buffer->getBufferSize() < strlen(ELF::ElfMagic)) return false; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 6c31f0dc12..07e704b459 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -18,42 +18,52 @@ using namespace llvm; - namespace llvm { + +namespace { + // Helper for extensive error checking in debug builds. + error_code Check(error_code Err) { + if (Err) { + report_fatal_error(Err.message()); + } + return Err; + } +} // end anonymous namespace + class RuntimeDyldELF : public RuntimeDyldImpl { protected: - void resolveX86_64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, + void resolveX86_64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); - void resolveX86Relocation(uint8_t *LocalAddress, - uint32_t FinalAddress, + void resolveX86Relocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); - void resolveARMRelocation(uint8_t *LocalAddress, - uint32_t FinalAddress, + void resolveARMRelocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); - void resolveMIPSRelocation(uint8_t *LocalAddress, - uint32_t FinalAddress, + void resolveMIPSRelocation(const SectionEntry &Section, + uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); - void resolvePPC64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, + void resolvePPC64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); - virtual void resolveRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, + virtual void resolveRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); @@ -64,6 +74,8 @@ protected: const SymbolTableMap &Symbols, StubMap &Stubs); + unsigned getCommonSymbolAlignment(const SymbolRef &Sym); + virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); uint64_t findPPC64TOC() const; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 45633e735c..829fd6c4c9 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -140,8 +140,10 @@ protected: typedef StringMap<SymbolLoc> SymbolTableMap; SymbolTableMap GlobalSymbolTable; - // Keep a map of common symbols to their sizes - typedef std::map<SymbolRef, unsigned> CommonSymbolMap; + // Pair representing the size and alignment requirement for a common symbol. + typedef std::pair<unsigned, unsigned> CommonSymbolInfo; + // Keep a map of common symbols to their info pairs + typedef std::map<SymbolRef, CommonSymbolInfo> CommonSymbolMap; // For each symbol, keep a list of relocations based on it. Anytime // its address is reassigned (the JIT re-compiled the function, e.g.), @@ -192,6 +194,13 @@ protected: return (uint8_t*)Sections[SectionID].Address; } + // Subclasses can override this method to get the alignment requirement of + // a common symbol. Returns no alignment requirement if not implemented. + virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) { + return 0; + } + + void writeInt16BE(uint8_t *Addr, uint16_t Value) { if (sys::isLittleEndianHost()) Value = sys::SwapByteOrder(Value); @@ -263,16 +272,14 @@ protected: void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value); /// \brief A object file specific relocation resolver - /// \param LocalAddress The address to apply the relocation action - /// \param FinalAddress If the linker prepare code for remote executon then - /// FinalAddress has the remote address to apply the - /// relocation action, otherwise is same as LocalAddress + /// \param Section The section where the relocation is being applied + /// \param Offset The offset into the section for this relocation /// \param Value Target symbol address to apply the relocation action /// \param Type object file specific relocation type /// \param Addend A constant addend used to compute the value to be stored /// into the relocatable field - virtual void resolveRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, + virtual void resolveRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) = 0; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 56540c23da..987c0c3afc 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -21,11 +21,13 @@ using namespace llvm::object; namespace llvm { -void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, +void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { + uint8_t *LocalAddress = Section.Address + Offset; + uint64_t FinalAddress = Section.LoadAddress + Offset; bool isPCRel = (Type >> 24) & 1; unsigned MachoType = (Type >> 28) & 0xf; unsigned Size = 1 << ((Type >> 25) & 3); @@ -211,7 +213,6 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL); RelocationValueRef Value; SectionEntry &Section = Sections[Rel.SectionID]; - uint8_t *Target = Section.Address + Rel.Offset; bool isExtern = (RelType >> 27) & 1; if (isExtern) { @@ -265,7 +266,7 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) - resolveRelocation(Target, (uint64_t)Target, + resolveRelocation(Section, Rel.Offset, (uint64_t)Section.Address + i->second, RelType, 0); else { @@ -279,7 +280,7 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Target, (uint64_t)Target, + resolveRelocation(Section, Rel.Offset, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index ef56f551fc..fe3539dff6 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -55,8 +55,8 @@ protected: StubMap &Stubs); public: - virtual void resolveRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, + virtual void resolveRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 1226f1a2e3..eed7a771b9 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -366,8 +366,9 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, instName = OpcodeToken.getString(); instLoc = OpcodeToken.getLoc(); + ParseInstructionInfo Info; if (NextToken.isNot(AsmToken::Eof) && - TargetParser->ParseInstruction(instName, instLoc, operands)) + TargetParser->ParseInstruction(Info, instName, instLoc, operands)) ret = -1; } else { ret = -1; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index d4ab2f07e8..051c7dd153 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -86,7 +86,7 @@ public: MemoryBuffer *I); }; -struct AsmRewrite; +//struct AsmRewrite; struct ParseStatementInfo { /// ParsedOperands - The parsed operands from the last parsed statement. SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; @@ -1383,8 +1383,9 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { for (unsigned i = 0, e = IDVal.size(); i != e; ++i) OpcodeStr.push_back(tolower(IDVal[i])); - bool HadError = getTargetParser().ParseInstruction(OpcodeStr.str(), IDLoc, - Info.ParsedOperands); + ParseInstructionInfo IInfo(Info.AsmRewrites); + bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr.str(), + IDLoc,Info.ParsedOperands); // Dump the parsed representation, if requested. if (getShowParsedOperands()) { @@ -1406,9 +1407,26 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // the instruction. if (!HadError && getContext().getGenDwarfForAssembly() && getContext().getGenDwarfSection() == getStreamer().getCurrentSection() ) { + + unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); + + // If we previously parsed a cpp hash file line comment then make sure the + // current Dwarf File is for the CppHashFilename if not then emit the + // Dwarf File table for it and adjust the line number for the .loc. + const std::vector<MCDwarfFile *> &MCDwarfFiles = + getContext().getMCDwarfFiles(); + if (CppHashFilename.size() != 0) { + if(MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != + CppHashFilename) + getStreamer().EmitDwarfFileDirective( + getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename); + + unsigned CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc, CurBuffer); + Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo); + } + getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(), - SrcMgr.FindLineNumber(IDLoc, CurBuffer), - 0, DWARF2_LINE_DEFAULT_IS_STMT ? + Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0, StringRef()); } @@ -3645,27 +3663,6 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { return false; } -namespace { -enum AsmRewriteKind { - AOK_Imm, - AOK_Input, - AOK_Output, - AOK_SizeDirective, - AOK_Emit, - AOK_Skip -}; - -struct AsmRewrite { - AsmRewriteKind Kind; - SMLoc Loc; - unsigned Len; - unsigned Size; -public: - AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, unsigned size = 0) - : Kind(kind), Loc(loc), Len(len), Size(size) { } -}; -} - bool AsmParser::ParseDirectiveEmit(SMLoc IDLoc, ParseStatementInfo &Info) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); @@ -3720,9 +3717,9 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Immediate. if (Operand->isImm()) { - AsmStrRewrites.push_back(AsmRewrite(AOK_Imm, - Operand->getStartLoc(), - Operand->getNameLen())); + if (Operand->needAsmRewrite()) + AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix, + Operand->getStartLoc())); continue; } @@ -3747,14 +3744,9 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, bool isOutput = (i == 1) && Desc.mayStore(); if (!Operand->isOffsetOf() && Operand->needSizeDirective()) AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, - Operand->getStartLoc(), 0, + Operand->getStartLoc(), + /*Len*/0, Operand->getMemSize())); - - // Don't emit the offset directive. - if (Operand->isOffsetOf()) - AsmStrRewrites.push_back(AsmRewrite(AOK_Skip, - Operand->getOffsetOfLoc(), 7)); - if (isOutput) { std::string Constraint = "="; ++InputIdx; @@ -3831,7 +3823,11 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, switch (Kind) { default: break; case AOK_Imm: - OS << Twine("$$") + StringRef(Loc, (*I).Len); + OS << Twine("$$"); + OS << (*I).Val; + break; + case AOK_ImmPrefix: + OS << Twine("$$"); break; case AOK_Input: OS << '$'; @@ -3842,7 +3838,7 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, OS << OutputIdx++; break; case AOK_SizeDirective: - switch((*I).Size) { + switch((*I).Val) { default: break; case 8: OS << "byte ptr "; break; case 16: OS << "word ptr "; break; @@ -3856,6 +3852,9 @@ bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString, case AOK_Emit: OS << ".byte"; break; + case AOK_DotOperator: + OS << (*I).Val; + break; } // Skip the original expression. diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 064689c3f3..0b7ee34c09 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -288,6 +288,11 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb, return object_error::success; } +error_code COFFObjectFile::getSymbolValue(DataRefImpl Symb, + uint64_t &Val) const { + report_fatal_error("getSymbolValue unimplemented in COFFObjectFile"); +} + error_code COFFObjectFile::getSectionNext(DataRefImpl Sec, SectionRef &Result) const { const coff_section *sec = toSec(Sec); diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 55bac7cbdf..45aeaac6b8 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -363,6 +363,10 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, return object_error::success; } +error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb, + uint64_t &Val) const { + report_fatal_error("getSymbolValue unimplemented in MachOObjectFile"); +} symbol_iterator MachOObjectFile::begin_symbols() const { // DRI.d.a = segment number; DRI.d.b = symbol index. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index d07a3c9e7f..43c68f4d1d 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -46,22 +46,27 @@ namespace llvm { /* Number of bits in the significand. This includes the integer bit. */ unsigned int precision; - - /* True if arithmetic is supported. */ - unsigned int arithmeticOK; }; - const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true }; - const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true }; - const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true }; - const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true }; - const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true }; - const fltSemantics APFloat::Bogus = { 0, 0, 0, true }; - - // The PowerPC format consists of two doubles. It does not map cleanly - // onto the usual format above. For now only storage of constants of - // this type is supported, no arithmetic. - const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false }; + const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; + const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; + const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; + const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; + const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; + const fltSemantics APFloat::Bogus = { 0, 0, 0 }; + + /* The PowerPC format consists of two doubles. It does not map cleanly + onto the usual format above. It is approximated using twice the + mantissa bits. Note that for exponents near the double minimum, + we no longer can represent the full 106 mantissa bits, so those + will be treated as denormal numbers. + + FIXME: While this approximation is equivalent to what GCC uses for + compile-time arithmetic on PPC double-double numbers, it is not able + to represent all possible values held by a PPC double-double number, + for example: (long double) 1.0 + (long double) 0x1p-106 + Should this be replaced by a full emulation of PPC double-double? */ + const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; /* A tight upper bound on number of parts required to hold the value pow(5, power) is @@ -116,12 +121,6 @@ hexDigitValue(unsigned int c) return -1U; } -static inline void -assertArithmeticOK(const llvm::fltSemantics &semantics) { - assert(semantics.arithmeticOK && - "Compile-time arithmetic does not support these semantics"); -} - /* Return the value of a decimal exponent of the form [+-]ddddddd. @@ -612,8 +611,6 @@ APFloat::assign(const APFloat &rhs) sign = rhs.sign; category = rhs.category; exponent = rhs.exponent; - sign2 = rhs.sign2; - exponent2 = rhs.exponent2; if (category == fcNormal || category == fcNaN) copySignificand(rhs); } @@ -707,16 +704,10 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { category != rhs.category || sign != rhs.sign) return false; - if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && - sign2 != rhs.sign2) - return false; if (category==fcZero || category==fcInfinity) return true; else if (category==fcNormal && exponent!=rhs.exponent) return false; - else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble && - exponent2!=rhs.exponent2) - return false; else { int i= partCount(); const integerPart* p=significandParts(); @@ -729,9 +720,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { } } -APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) - : exponent2(0), sign2(0) { - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { initialize(&ourSemantics); sign = 0; zeroSignificand(); @@ -740,24 +729,19 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } -APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) { - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics) { initialize(&ourSemantics); category = fcZero; sign = false; } -APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) - : exponent2(0), sign2(0) { - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { // Allocates storage if necessary but does not initialize it. initialize(&ourSemantics); } APFloat::APFloat(const fltSemantics &ourSemantics, - fltCategory ourCategory, bool negative) - : exponent2(0), sign2(0) { - assertArithmeticOK(ourSemantics); + fltCategory ourCategory, bool negative) { initialize(&ourSemantics); category = ourCategory; sign = negative; @@ -767,14 +751,12 @@ APFloat::APFloat(const fltSemantics &ourSemantics, makeNaN(); } -APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) - : exponent2(0), sign2(0) { - assertArithmeticOK(ourSemantics); +APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); } -APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) { +APFloat::APFloat(const APFloat &rhs) { initialize(rhs.semantics); assign(rhs); } @@ -1561,8 +1543,6 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode, { opStatus fs; - assertArithmeticOK(*semantics); - fs = addOrSubtractSpecials(rhs, subtract); /* This return code means it was not a simple case. */ @@ -1607,7 +1587,6 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = multiplySpecials(rhs); @@ -1627,7 +1606,6 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); sign ^= rhs.sign; fs = divideSpecials(rhs); @@ -1649,7 +1627,6 @@ APFloat::remainder(const APFloat &rhs) APFloat V = *this; unsigned int origSign = sign; - assertArithmeticOK(*semantics); fs = V.divide(rhs, rmNearestTiesToEven); if (fs == opDivByZero) return fs; @@ -1684,7 +1661,6 @@ APFloat::opStatus APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); fs = modSpecials(rhs); if (category == fcNormal && rhs.category == fcNormal) { @@ -1728,8 +1704,6 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, { opStatus fs; - assertArithmeticOK(*semantics); - /* Post-multiplication sign, before addition. */ sign ^= multiplicand.sign; @@ -1770,7 +1744,6 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, /* Rounding-mode corrrect round to integral value. */ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { opStatus fs; - assertArithmeticOK(*semantics); // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate @@ -1817,7 +1790,6 @@ APFloat::compare(const APFloat &rhs) const { cmpResult result; - assertArithmeticOK(*semantics); assert(semantics == rhs.semantics); switch (convolve(category, rhs.category)) { @@ -1902,8 +1874,6 @@ APFloat::convert(const fltSemantics &toSemantics, int shift; const fltSemantics &fromSemantics = *semantics; - assertArithmeticOK(fromSemantics); - assertArithmeticOK(toSemantics); lostFraction = lfExactlyZero; newPartCount = partCountForBits(toSemantics.precision + 1); oldPartCount = partCount(); @@ -1988,8 +1958,6 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width, const integerPart *src; unsigned int dstPartsCount, truncatedBits; - assertArithmeticOK(*semantics); - *isExact = false; /* Handle the three special cases first. */ @@ -2151,7 +2119,6 @@ APFloat::convertFromUnsignedParts(const integerPart *src, integerPart *dst; lostFraction lost_fraction; - assertArithmeticOK(*semantics); category = fcNormal; omsb = APInt::tcMSB(src, srcCount) + 1; dst = significandParts(); @@ -2202,7 +2169,6 @@ APFloat::convertFromSignExtendedInteger(const integerPart *src, { opStatus status; - assertArithmeticOK(*semantics); if (isSigned && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) { integerPart *copy; @@ -2336,7 +2302,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, roundingMode rounding_mode) { unsigned int parts, pow5PartCount; - fltSemantics calcSemantics = { 32767, -32767, 0, true }; + fltSemantics calcSemantics = { 32767, -32767, 0 }; integerPart pow5Parts[maxPowerOfFiveParts]; bool isNearest; @@ -2528,7 +2494,6 @@ APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) APFloat::opStatus APFloat::convertFromString(StringRef str, roundingMode rounding_mode) { - assertArithmeticOK(*semantics); assert(!str.empty() && "Invalid string length"); /* Handle a leading minus sign. */ @@ -2580,8 +2545,6 @@ APFloat::convertToHexString(char *dst, unsigned int hexDigits, { char *p; - assertArithmeticOK(*semantics); - p = dst; if (sign) *dst++ = '-'; @@ -2790,42 +2753,46 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble); assert(partCount()==2); - uint64_t myexponent, mysignificand, myexponent2, mysignificand2; - - if (category==fcNormal) { - myexponent = exponent + 1023; //bias - myexponent2 = exponent2 + 1023; - mysignificand = significandParts()[0]; - mysignificand2 = significandParts()[1]; - if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) - myexponent = 0; // denormal - if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL)) - myexponent2 = 0; // denormal - } else if (category==fcZero) { - myexponent = 0; - mysignificand = 0; - myexponent2 = 0; - mysignificand2 = 0; - } else if (category==fcInfinity) { - myexponent = 0x7ff; - myexponent2 = 0; - mysignificand = 0; - mysignificand2 = 0; + uint64_t words[2]; + opStatus fs; + bool losesInfo; + + // Convert number to double. To avoid spurious underflows, we re- + // normalize against the "double" minExponent first, and only *then* + // truncate the mantissa. The result of that second conversion + // may be inexact, but should never underflow. + APFloat extended(*this); + fltSemantics extendedSemantics = *semantics; + extendedSemantics.minExponent = IEEEdouble.minExponent; + fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + APFloat u(extended); + fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK || fs == opInexact); + (void)fs; + words[0] = *u.convertDoubleAPFloatToAPInt().getRawData(); + + // If conversion was exact or resulted in a special case, we're done; + // just set the second double to zero. Otherwise, re-convert back to + // the extended format and compute the difference. This now should + // convert exactly to double. + if (u.category == fcNormal && losesInfo) { + fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + APFloat v(extended); + v.subtract(u, rmNearestTiesToEven); + fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + words[1] = *v.convertDoubleAPFloatToAPInt().getRawData(); } else { - assert(category == fcNaN && "Unknown category"); - myexponent = 0x7ff; - mysignificand = significandParts()[0]; - myexponent2 = exponent2; - mysignificand2 = significandParts()[1]; + words[1] = 0; } - uint64_t words[2]; - words[0] = ((uint64_t)(sign & 1) << 63) | - ((myexponent & 0x7ff) << 52) | - (mysignificand & 0xfffffffffffffLL); - words[1] = ((uint64_t)(sign2 & 1) << 63) | - ((myexponent2 & 0x7ff) << 52) | - (mysignificand2 & 0xfffffffffffffLL); return APInt(128, words); } @@ -3045,47 +3012,23 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) assert(api.getBitWidth()==128); uint64_t i1 = api.getRawData()[0]; uint64_t i2 = api.getRawData()[1]; - uint64_t myexponent = (i1 >> 52) & 0x7ff; - uint64_t mysignificand = i1 & 0xfffffffffffffLL; - uint64_t myexponent2 = (i2 >> 52) & 0x7ff; - uint64_t mysignificand2 = i2 & 0xfffffffffffffLL; + opStatus fs; + bool losesInfo; - initialize(&APFloat::PPCDoubleDouble); - assert(partCount()==2); + // Get the first double and convert to our format. + initFromDoubleAPInt(APInt(64, i1)); + fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; - sign = static_cast<unsigned int>(i1>>63); - sign2 = static_cast<unsigned int>(i2>>63); - if (myexponent==0 && mysignificand==0) { - // exponent, significand meaningless - // exponent2 and significand2 are required to be 0; we don't check - category = fcZero; - } else if (myexponent==0x7ff && mysignificand==0) { - // exponent, significand meaningless - // exponent2 and significand2 are required to be 0; we don't check - category = fcInfinity; - } else if (myexponent==0x7ff && mysignificand!=0) { - // exponent meaningless. So is the whole second word, but keep it - // for determinism. - category = fcNaN; - exponent2 = myexponent2; - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - } else { - category = fcNormal; - // Note there is no category2; the second word is treated as if it is - // fcNormal, although it might be something else considered by itself. - exponent = myexponent - 1023; - exponent2 = myexponent2 - 1023; - significandParts()[0] = mysignificand; - significandParts()[1] = mysignificand2; - if (myexponent==0) // denormal - exponent = -1022; - else - significandParts()[0] |= 0x10000000000000LL; // integer bit - if (myexponent2==0) - exponent2 = -1022; - else - significandParts()[1] |= 0x10000000000000LL; // integer bit + // Unless we have a special case, add in second double. + if (category == fcNormal) { + APFloat v(APInt(64, i2)); + fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); + assert(fs == opOK && !losesInfo); + (void)fs; + + add(v, rmNearestTiesToEven); } } @@ -3311,15 +3254,15 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { return Val; } -APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) { +APFloat::APFloat(const APInt& api, bool isIEEE) { initFromAPInt(api, isIEEE); } -APFloat::APFloat(float f) : exponent2(0), sign2(0) { +APFloat::APFloat(float f) { initFromAPInt(APInt::floatToBits(f)); } -APFloat::APFloat(double d) : exponent2(0), sign2(0) { +APFloat::APFloat(double d) { initFromAPInt(APInt::doubleToBits(d)); } diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 3001f6c468..9559ad7295 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -21,11 +21,15 @@ using namespace llvm; #undef MemoryFence #endif +#if defined(__GNUC__) || (defined(__IBMCPP__) && __IBMCPP__ >= 1210) +#define GNU_ATOMICS +#endif + void sys::MemoryFence() { #if LLVM_HAS_ATOMICS == 0 return; #else -# if defined(__GNUC__) +# if defined(GNU_ATOMICS) __sync_synchronize(); # elif defined(_MSC_VER) MemoryBarrier(); @@ -43,7 +47,7 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr, if (result == old_value) *ptr = new_value; return result; -#elif defined(__GNUC__) +#elif defined(GNU_ATOMICS) return __sync_val_compare_and_swap(ptr, old_value, new_value); #elif defined(_MSC_VER) return InterlockedCompareExchange(ptr, new_value, old_value); @@ -56,7 +60,7 @@ sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) { #if LLVM_HAS_ATOMICS == 0 ++(*ptr); return *ptr; -#elif defined(__GNUC__) +#elif defined(GNU_ATOMICS) return __sync_add_and_fetch(ptr, 1); #elif defined(_MSC_VER) return InterlockedIncrement(ptr); @@ -69,7 +73,7 @@ sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) { #if LLVM_HAS_ATOMICS == 0 --(*ptr); return *ptr; -#elif defined(__GNUC__) +#elif defined(GNU_ATOMICS) return __sync_sub_and_fetch(ptr, 1); #elif defined(_MSC_VER) return InterlockedDecrement(ptr); @@ -82,7 +86,7 @@ sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) { #if LLVM_HAS_ATOMICS == 0 *ptr += val; return *ptr; -#elif defined(__GNUC__) +#elif defined(GNU_ATOMICS) return __sync_add_and_fetch(ptr, val); #elif defined(_MSC_VER) return InterlockedExchangeAdd(ptr, val) + val; diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 83baf60d04..6af0f4a6c9 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -1,9 +1,3 @@ -## FIXME: This only requires RTTI because tblgen uses it. Fix that. -set(LLVM_REQUIRES_RTTI 1) -if( MINGW ) - set(LLVM_REQUIRES_EH 1) -endif() - add_llvm_library(LLVMSupport APFloat.cpp APInt.cpp diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index 00be43b750..730220f47d 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -53,8 +53,10 @@ std::string StrError(int errnum) { str = buffer; # endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" - if (errnum) + if (errnum) { strerror_s(buffer, MaxErrStrLen - 1, errnum); + str = buffer; + } #elif defined(HAVE_STRERROR) // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 9ee3f2db92..34e32b817b 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -503,6 +503,7 @@ std::string sys::getHostCPUName() { .Case("0xb76", "arm1176jz-s") .Case("0xc08", "cortex-a8") .Case("0xc09", "cortex-a9") + .Case("0xc0f", "cortex-a15") .Case("0xc20", "cortex-m0") .Case("0xc23", "cortex-m3") .Case("0xc24", "cortex-m4") diff --git a/lib/Support/Makefile b/lib/Support/Makefile index d68e500ca5..4a2185d589 100644 --- a/lib/Support/Makefile +++ b/lib/Support/Makefile @@ -11,9 +11,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMSupport BUILD_ARCHIVE = 1 -## FIXME: This only requires RTTI because tblgen uses it. Fix that. -REQUIRES_RTTI = 1 - EXTRA_DIST = Unix Win32 README.txt include $(LEVEL)/Makefile.common diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 2cc7a58462..c59ec19ecb 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -180,38 +180,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Default(UnknownArch); } -Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { - // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for - // archs which Darwin doesn't use. - - // The matching this routine does is fairly pointless, since it is neither the - // complete architecture list, nor a reasonable subset. The problem is that - // historically the driver driver accepts this and also ties its -march= - // handling to the architecture name, so we need to be careful before removing - // support for it. - - // This code must be kept in sync with Clang's Darwin specific argument - // translation. - - return StringSwitch<ArchType>(Str) - .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", Triple::ppc) - .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", Triple::ppc) - .Case("ppc64", Triple::ppc64) - .Cases("i386", "i486", "i486SX", "i586", "i686", Triple::x86) - .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4", - Triple::x86) - .Case("x86_64", Triple::x86_64) - // This is derived from the driver driver. - .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm) - .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm) - .Case("r600", Triple::r600) - .Case("nvptx", Triple::nvptx) - .Case("nvptx64", Triple::nvptx64) - .Case("amdil", Triple::amdil) - .Case("spir", Triple::spir) - .Default(Triple::UnknownArch); -} - // Returns architecture name that is understood by the target assembler. const char *Triple::getArchNameForAssembler() { if (!isOSDarwin() && getVendor() != Triple::Apple) diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 4f64eb4ff2..935d674a36 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LLVM_REQUIRES_EH 1) - add_llvm_library(LLVMTableGen Error.cpp Main.cpp diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index ad98fba9ba..0bb86b0686 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -45,10 +45,6 @@ void PrintWarning(const Twine &Msg) { errs() << "warning:" << Msg << "\n"; } -void PrintWarning(const TGError &Warning) { - PrintWarning(Warning.getLoc(), Warning.getMessage()); -} - void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg) { PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } @@ -61,10 +57,6 @@ void PrintError(const Twine &Msg) { errs() << "error:" << Msg << "\n"; } -void PrintError(const TGError &Error) { - PrintError(Error.getLoc(), Error.getMessage()); -} - void PrintFatalError(const std::string &Msg) { PrintError(Twine(Msg)); std::exit(1); diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index d87d175502..d0ca756016 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -80,56 +80,46 @@ namespace llvm { int TableGenMain(char *argv0, TableGenMainFn *MainFn) { RecordKeeper Records; - try { - // Parse the input file. - OwningPtr<MemoryBuffer> File; - if (error_code ec = - MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { - errs() << "Could not open input file '" << InputFilename << "': " - << ec.message() <<"\n"; - return 1; - } - MemoryBuffer *F = File.take(); - - // Tell SrcMgr about this buffer, which is what TGParser will pick up. - SrcMgr.AddNewSourceBuffer(F, SMLoc()); - - // Record the location of the include directory so that the lexer can find - // it later. - SrcMgr.setIncludeDirs(IncludeDirs); - - TGParser Parser(SrcMgr, Records); - - if (Parser.ParseFile()) - return 1; - - std::string Error; - tool_output_file Out(OutputFilename.c_str(), Error); - if (!Error.empty()) { - errs() << argv0 << ": error opening " << OutputFilename - << ":" << Error << "\n"; - return 1; - } - if (!DependFilename.empty()) - if (int Ret = createDependencyFile(Parser, argv0)) - return Ret; - - if (MainFn(Out.os(), Records)) - return 1; - - // Declare success. - Out.keep(); - return 0; - - } catch (const TGError &Error) { - PrintError(Error); - } catch (const std::string &Error) { - PrintError(Error); - } catch (const char *Error) { - PrintError(Error); - } catch (...) { - errs() << argv0 << ": Unknown unexpected exception occurred.\n"; + // Parse the input file. + OwningPtr<MemoryBuffer> File; + if (error_code ec = + MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { + errs() << "Could not open input file '" << InputFilename << "': " + << ec.message() <<"\n"; + return 1; } + MemoryBuffer *F = File.take(); + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(F, SMLoc()); + + // Record the location of the include directory so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + TGParser Parser(SrcMgr, Records); + + if (Parser.ParseFile()) + return 1; + + std::string Error; + tool_output_file Out(OutputFilename.c_str(), Error); + if (!Error.empty()) { + errs() << argv0 << ": error opening " << OutputFilename + << ":" << Error << "\n"; + return 1; + } + if (!DependFilename.empty()) { + if (int Ret = createDependencyFile(Parser, argv0)) + return Ret; + } + + if (MainFn(Out.os(), Records)) + return 1; + + // Declare success. + Out.keep(); + return 0; return 1; } diff --git a/lib/TableGen/Makefile b/lib/TableGen/Makefile index 732d8a197e..345db3465c 100644 --- a/lib/TableGen/Makefile +++ b/lib/TableGen/Makefile @@ -11,6 +11,4 @@ LEVEL = ../.. LIBRARYNAME = LLVMTableGen BUILD_ARCHIVE = 1 -REQUIRES_EH = 1 - include $(LEVEL)/Makefile.common diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index c7b2de2b0f..11feb43542 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -616,7 +616,8 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const { Record *ListInit::getElementAsRecord(unsigned i) const { assert(i < Values.size() && "List element index out of range!"); DefInit *DI = dyn_cast<DefInit>(Values[i]); - if (DI == 0) throw "Expected record in list!"; + if (DI == 0) + PrintFatalError("Expected record in list!"); return DI->getDef(); } @@ -725,7 +726,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { if (CurRec) { if (const RecordVal *RV = CurRec->getValue(Name)) { if (RV->getType() != getType()) - throw "type mismatch in cast"; + PrintFatalError("type mismatch in cast"); return VarInit::get(Name, RV->getType()); } @@ -737,7 +738,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { assert(RV && "Template arg doesn't exist??"); if (RV->getType() != getType()) - throw "type mismatch in cast"; + PrintFatalError("type mismatch in cast"); return VarInit::get(TemplateArgName, RV->getType()); } @@ -751,7 +752,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { assert(RV && "Template arg doesn't exist??"); if (RV->getType() != getType()) - throw "type mismatch in cast"; + PrintFatalError("type mismatch in cast"); return VarInit::get(MCName, RV->getType()); } @@ -760,7 +761,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { if (Record *D = (CurRec->getRecords()).getDef(Name)) return DefInit::get(D); - throw TGError(CurRec->getLoc(), "Undefined reference:'" + Name + "'\n"); + PrintFatalError(CurRec->getLoc(), + "Undefined reference:'" + Name + "'\n"); } } break; @@ -860,7 +862,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator()); DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator()); if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef()) - throw "Concated Dag operators do not match!"; + PrintFatalError("Concated Dag operators do not match!"); std::vector<Init*> Args; std::vector<std::string> ArgNames; for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) { @@ -1027,14 +1029,13 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, OpInit *RHSo = dyn_cast<OpInit>(RHS); if (!RHSo) { - throw TGError(CurRec->getLoc(), "!foreach requires an operator\n"); + PrintFatalError(CurRec->getLoc(), "!foreach requires an operator\n"); } TypedInit *LHSt = dyn_cast<TypedInit>(LHS); - if (!LHSt) { - throw TGError(CurRec->getLoc(), "!foreach requires typed variable\n"); - } + if (!LHSt) + PrintFatalError(CurRec->getLoc(), "!foreach requires typed variable\n"); if ((MHSd && isa<DagRecTy>(Type)) || (MHSl && isa<ListRecTy>(Type))) { if (MHSd) { @@ -1632,7 +1633,7 @@ void Record::checkName() { assert(TypedName && "Record name is not typed!"); RecTy *Type = TypedName->getType(); if (!isa<StringRecTy>(Type)) - throw TGError(getLoc(), "Record name is not a string!"); + PrintFatalError(getLoc(), "Record name is not a string!"); } DefInit *Record::getDefInit() { @@ -1683,7 +1684,7 @@ void Record::resolveReferencesTo(const RecordVal *RV) { continue; if (Init *V = Values[i].getValue()) if (Values[i].setValue(V->resolveReferences(*this, RV))) - throw TGError(getLoc(), "Invalid value is found when setting '" + PrintFatalError(getLoc(), "Invalid value is found when setting '" + Values[i].getNameInitAsString() + "' after resolving references" + (RV ? " against '" + RV->getNameInitAsString() @@ -1738,68 +1739,68 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { } /// getValueInit - Return the initializer for a value with the specified name, -/// or throw an exception if the field does not exist. +/// or abort if the field does not exist. /// Init *Record::getValueInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); return R->getValue(); } /// getValueAsString - This method looks up the specified field and returns its -/// value as a string, throwing an exception if the field does not exist or if +/// value as a string, aborts if the field does not exist or if /// the value is not a string. /// std::string Record::getValueAsString(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (StringInit *SI = dyn_cast<StringInit>(R->getValue())) return SI->getValue(); - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a string initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a string initializer!"); } /// getValueAsBitsInit - This method looks up the specified field and returns -/// its value as a BitsInit, throwing an exception if the field does not exist -/// or if the value is not the right type. +/// its value as a BitsInit, aborts if the field does not exist or if +/// the value is not the right type. /// BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue())) return BI; - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a BitsInit initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a BitsInit initializer!"); } /// getValueAsListInit - This method looks up the specified field and returns -/// its value as a ListInit, throwing an exception if the field does not exist -/// or if the value is not the right type. +/// its value as a ListInit, aborting if the field does not exist or if +/// the value is not the right type. /// ListInit *Record::getValueAsListInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (ListInit *LI = dyn_cast<ListInit>(R->getValue())) return LI; - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a list initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a list initializer!"); } /// getValueAsListOfDefs - This method looks up the specified field and returns -/// its value as a vector of records, throwing an exception if the field does -/// not exist or if the value is not the right type. +/// its value as a vector of records, aborting if the field does not exist +/// or if the value is not the right type. /// std::vector<Record*> Record::getValueAsListOfDefs(StringRef FieldName) const { @@ -1809,32 +1810,32 @@ Record::getValueAsListOfDefs(StringRef FieldName) const { if (DefInit *DI = dyn_cast<DefInit>(List->getElement(i))) { Defs.push_back(DI->getDef()); } else { - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' list is not entirely DefInit!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' list is not entirely DefInit!"); } } return Defs; } /// getValueAsInt - This method looks up the specified field and returns its -/// value as an int64_t, throwing an exception if the field does not exist or if -/// the value is not the right type. +/// value as an int64_t, aborting if the field does not exist or if the value +/// is not the right type. /// int64_t Record::getValueAsInt(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (IntInit *II = dyn_cast<IntInit>(R->getValue())) return II->getValue(); - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have an int initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have an int initializer!"); } /// getValueAsListOfInts - This method looks up the specified field and returns -/// its value as a vector of integers, throwing an exception if the field does -/// not exist or if the value is not the right type. +/// its value as a vector of integers, aborting if the field does not exist or +/// if the value is not the right type. /// std::vector<int64_t> Record::getValueAsListOfInts(StringRef FieldName) const { @@ -1844,16 +1845,16 @@ Record::getValueAsListOfInts(StringRef FieldName) const { if (IntInit *II = dyn_cast<IntInit>(List->getElement(i))) { Ints.push_back(II->getValue()); } else { - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a list of ints initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a list of ints initializer!"); } } return Ints; } /// getValueAsListOfStrings - This method looks up the specified field and -/// returns its value as a vector of strings, throwing an exception if the -/// field does not exist or if the value is not the right type. +/// returns its value as a vector of strings, aborting if the field does not +/// exist or if the value is not the right type. /// std::vector<std::string> Record::getValueAsListOfStrings(StringRef FieldName) const { @@ -1863,50 +1864,50 @@ Record::getValueAsListOfStrings(StringRef FieldName) const { if (StringInit *II = dyn_cast<StringInit>(List->getElement(i))) { Strings.push_back(II->getValue()); } else { - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a list of strings initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a list of strings initializer!"); } } return Strings; } /// getValueAsDef - This method looks up the specified field and returns its -/// value as a Record, throwing an exception if the field does not exist or if -/// the value is not the right type. +/// value as a Record, aborting if the field does not exist or if the value +/// is not the right type. /// Record *Record::getValueAsDef(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (DefInit *DI = dyn_cast<DefInit>(R->getValue())) return DI->getDef(); - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a def initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a def initializer!"); } /// getValueAsBit - This method looks up the specified field and returns its -/// value as a bit, throwing an exception if the field does not exist or if -/// the value is not the right type. +/// value as a bit, aborting if the field does not exist or if the value is +/// not the right type. /// bool Record::getValueAsBit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (BitInit *BI = dyn_cast<BitInit>(R->getValue())) return BI->getValue(); - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a bit initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a bit initializer!"); } bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (R->getValue() == UnsetInit::get()) { Unset = true; @@ -1915,24 +1916,24 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { Unset = false; if (BitInit *BI = dyn_cast<BitInit>(R->getValue())) return BI->getValue(); - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a bit initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a bit initializer!"); } /// getValueAsDag - This method looks up the specified field and returns its -/// value as an Dag, throwing an exception if the field does not exist or if -/// the value is not the right type. +/// value as an Dag, aborting if the field does not exist or if the value is +/// not the right type. /// DagInit *Record::getValueAsDag(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (R == 0 || R->getValue() == 0) - throw "Record `" + getName() + "' does not have a field named `" + - FieldName.str() + "'!\n"; + PrintFatalError(getLoc(), "Record `" + getName() + + "' does not have a field named `" + FieldName.str() + "'!\n"); if (DagInit *DI = dyn_cast<DagInit>(R->getValue())) return DI; - throw "Record `" + getName() + "', field `" + FieldName.str() + - "' does not have a dag initializer!"; + PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + + FieldName.str() + "' does not have a dag initializer!"); } @@ -1975,7 +1976,7 @@ std::vector<Record*> RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const { Record *Class = getClass(ClassName); if (!Class) - throw "ERROR: Couldn't find the `" + ClassName + "' class!\n"; + PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n"); std::vector<Record*> Defs; for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(), diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b301d21d1d..5280abb40c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -702,6 +702,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; @@ -791,6 +793,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI); + AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -938,6 +947,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = @@ -963,6 +973,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (ARM::DPRRegClass.hasSubClassEq(RC)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { + unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA; + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index fa1d0004d7..d28f7ff73f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -110,148 +110,12 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned i = 0; i != 16; ++i) Reserved.set(ARM::D16 + i); } - return Reserved; -} - -bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, - unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (Reg) { - default: break; - case ARM::SP: - case ARM::PC: - return true; - case ARM::R6: - if (hasBasePointer(MF)) - return true; - break; - case ARM::R7: - case ARM::R11: - if (FramePtr == Reg && TFI->hasFP(MF)) - return true; - break; - case ARM::R9: - return STI.isR9Reserved(); - } - - return false; -} - -bool -ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const { - - unsigned Size = RC->getSize() * 8; - if (Size < 6) - return 0; + const TargetRegisterClass *RC = &ARM::GPRPairRegClass; + for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) + for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) Reserved.set(*I); - NewSubIdx = 0; // Whole register. - unsigned NumRegs = SubIndices.size(); - if (NumRegs == 8) { - // 8 D registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[0] == ARM::dsub_0 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3 && - SubIndices[4] == ARM::dsub_4 && - SubIndices[5] == ARM::dsub_5 && - SubIndices[6] == ARM::dsub_6 && - SubIndices[7] == ARM::dsub_7); - } else if (NumRegs == 4) { - if (SubIndices[0] == ARM::qsub_0) { - // 4 Q registers -> 1 QQQQ register. - return (Size == 512 && - SubIndices[1] == ARM::qsub_1 && - SubIndices[2] == ARM::qsub_2 && - SubIndices[3] == ARM::qsub_3); - } else if (SubIndices[0] == ARM::dsub_0) { - // 4 D registers -> 1 QQ register. - if (Size >= 256 && - SubIndices[1] == ARM::dsub_1 && - SubIndices[2] == ARM::dsub_2 && - SubIndices[3] == ARM::dsub_3) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 4 D registers -> 1 QQ register (2nd). - if (Size == 512 && - SubIndices[1] == ARM::dsub_5 && - SubIndices[2] == ARM::dsub_6 && - SubIndices[3] == ARM::dsub_7) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 4 S registers -> 1 Q register. - if (Size >= 128 && - SubIndices[1] == ARM::ssub_1 && - SubIndices[2] == ARM::ssub_2 && - SubIndices[3] == ARM::ssub_3) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } - } else if (NumRegs == 2) { - if (SubIndices[0] == ARM::qsub_0) { - // 2 Q registers -> 1 QQ register. - if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { - if (Size == 512) - NewSubIdx = ARM::qqsub_0; - return true; - } - } else if (SubIndices[0] == ARM::qsub_2) { - // 2 Q registers -> 1 QQ register (2nd). - if (Size == 512 && SubIndices[1] == ARM::qsub_3) { - NewSubIdx = ARM::qqsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_0) { - // 2 D registers -> 1 Q register. - if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { - if (Size >= 256) - NewSubIdx = ARM::qsub_0; - return true; - } - } else if (SubIndices[0] == ARM::dsub_2) { - // 2 D registers -> 1 Q register (2nd). - if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { - NewSubIdx = ARM::qsub_1; - return true; - } - } else if (SubIndices[0] == ARM::dsub_4) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_5) { - NewSubIdx = ARM::qsub_2; - return true; - } - } else if (SubIndices[0] == ARM::dsub_6) { - // 2 D registers -> 1 Q register (3rd). - if (Size == 512 && SubIndices[1] == ARM::dsub_7) { - NewSubIdx = ARM::qsub_3; - return true; - } - } else if (SubIndices[0] == ARM::ssub_0) { - // 2 S registers -> 1 D register. - if (SubIndices[1] == ARM::ssub_1) { - if (Size >= 128) - NewSubIdx = ARM::dsub_0; - return true; - } - } else if (SubIndices[0] == ARM::ssub_2) { - // 2 S registers -> 1 D register (2nd). - if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { - NewSubIdx = ARM::dsub_1; - return true; - } - } - } - return false; + return Reserved; } const TargetRegisterClass* @@ -267,6 +131,7 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) case ARM::QPRRegClassID: case ARM::QQPRRegClassID: case ARM::QQQQPRRegClassID: + case ARM::GPRPairRegClassID: return Super; } Super = *I++; @@ -600,6 +465,7 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. @@ -608,10 +474,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R3: return ARM::R2; case ARM::R5: return ARM::R4; case ARM::R7: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) ? 0 : ARM::R6; - case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + case ARM::R9: return MRI.isReserved(ARM::R9) ? 0 :ARM::R8; + case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10; case ARM::S1: return ARM::S0; case ARM::S3: return ARM::S2; @@ -653,6 +519,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); switch (Reg) { default: break; // Return 0 if either register of the pair is a special register. @@ -661,10 +528,10 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R2: return ARM::R3; case ARM::R4: return ARM::R5; case ARM::R6: - return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) ? 0 : ARM::R7; - case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + case ARM::R8: return MRI.isReserved(ARM::R9) ? 0 :ARM::R9; + case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11; case ARM::S0: return ARM::S1; case ARM::S2: return ARM::S3; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index da29f7e711..dbbb566900 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -99,16 +99,6 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; - /// canCombineSubRegIndices - Given a register class and a list of - /// subregister indices, return true if it's possible to combine the - /// subregister indices into one that corresponds to a larger - /// subregister. Return the new subregister index by reference. Note the - /// new index may be zero if the given subregisters can be combined to - /// form the whole register. - virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const; - const TargetRegisterClass* getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; const TargetRegisterClass* @@ -170,8 +160,6 @@ public: unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp deleted file mode 100644 index 7bca0edf91..0000000000 --- a/lib/Target/ARM/ARMELFWriterInfo.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#include "ARMELFWriterInfo.h" -#include "ARMRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ELF.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the ARMELFWriterInfo class -//===----------------------------------------------------------------------===// - -ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64, - TM.getDataLayout()->isLittleEndian()) { -} - -ARMELFWriterInfo::~ARMELFWriterInfo() {} - -unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch (MachineRelTy) { - case ARM::reloc_arm_absolute: - case ARM::reloc_arm_relative: - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - case ARM::reloc_arm_machine_cp_entry: - case ARM::reloc_arm_jt_base: - case ARM::reloc_arm_pic_jt: - llvm_unreachable("unsupported ARM relocation type"); - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; - default: - llvm_unreachable("unknown ARM relocation type"); - } -} - -long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " - "implemented"); -} - -unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); -} - -bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); -} - -unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} - -long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " - "implemented"); -} diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h deleted file mode 100644 index 6a84f8ac42..0000000000 --- a/lib/Target/ARM/ARMELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the ARM backend. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_ELF_WRITER_INFO_H -#define ARM_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - class TargetMachine; - - class ARMELFWriterInfo : public TargetELFWriterInfo { - public: - ARMELFWriterInfo(TargetMachine &TM); - virtual ~ARMELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // ARM_ELF_WRITER_INFO_H diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e75a006097..4ef226a2a6 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1315,57 +1315,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VSETLNi8Q: - case ARM::VSETLNi16Q: { - // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting - // on the respective D register. - - unsigned QReg = MI.getOperand(1).getReg(); - unsigned QLane = MI.getOperand(3).getImm(); - - unsigned NewOpcode, DLane, DSubReg; - switch (Opcode) { - default: llvm_unreachable("Invalid opcode!"); - case ARM::VSETLNi8Q: - // 4 possible 8-bit lanes per DPR: - NewOpcode = ARM::VSETLNi8; - DLane = QLane % 8; - DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0; - break; - case ARM::VSETLNi16Q: - // 4 possible 16-bit lanes per DPR. - NewOpcode = ARM::VSETLNi16; - DLane = QLane % 4; - DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0; - break; - } - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode)); - - unsigned DReg = TRI->getSubReg(QReg, DSubReg); - - MIB.addReg(DReg, RegState::Define); // Output DPR - MIB.addReg(DReg); // Input DPR - MIB.addOperand(MI.getOperand(2)); // Input GPR - MIB.addImm(DLane); // Lane - - // Add the predicate operands. - MIB.addOperand(MI.getOperand(4)); - MIB.addOperand(MI.getOperand(5)); - - if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register. - MIB->addRegisterKilled(QReg, TRI, true); - // And an implicit def of the output register (which should always be the - // same as the input register). - MIB->addRegisterDefined(QReg, TRI); - - TransferImpOps(MI, MIB, MIB); - - MI.eraseFromParent(); - return true; - } - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2379c425aa..c8ddbcfaec 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -164,7 +164,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MF.getFunction()->needsUnwindTableEntry(); // @LOCALMOD-END - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -441,7 +442,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; @@ -1291,6 +1293,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned FramePtr = RegInfo->getFrameRegister(MF); // Spill R4 if Thumb2 function requires stack realignment - it will be used as @@ -1300,12 +1303,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getVarArgsRegSaveSize() > 0) - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1315,7 +1318,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. unsigned StackSize = estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) - MF.getRegInfo().setPhysRegUsed(ARM::R4); + MRI.setPhysRegUsed(ARM::R4); } // See if we can spill vector registers to aligned stack. @@ -1323,7 +1326,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + MRI.setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. @@ -1331,7 +1334,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { + if (MRI.isPhysRegUsed(Reg)) { Spilled = true; CanEliminateFrame = false; } @@ -1420,7 +1423,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); @@ -1429,7 +1432,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); + MRI.setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -1444,16 +1447,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill high register if the function is thumb1 if (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; } } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - if (!RegInfo->isReservedReg(MF, Reg)) + MRI.setPhysRegUsed(Reg); + if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } } @@ -1471,7 +1474,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg) && + if (!MRI.isReserved(Reg) && (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || Reg == ARM::LR)) { Extras.push_back(Reg); @@ -1483,7 +1486,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS2GPRs.empty()) { unsigned Reg = UnspilledCS2GPRs.back(); UnspilledCS2GPRs.pop_back(); - if (!RegInfo->isReservedReg(MF, Reg)) { + if (!MRI.isReserved(Reg)) { Extras.push_back(Reg); NumExtras--; } @@ -1491,7 +1494,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); + MRI.setPhysRegUsed(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot @@ -1505,7 +1508,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); + MRI.setPhysRegUsed(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7d39704028..6cf8473f9d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1634,11 +1634,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; + bool HasMinSizeAttr = MF.getFunction()->getFnAttributes(). + hasAttribute(Attributes::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && !isARMFunc && - Subtarget->hasRAS() && !Subtarget->isThumb1Only()) + Subtarget->hasRAS() && !Subtarget->isThumb1Only() && + // Emit regular call when code size is the priority + !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -1646,7 +1650,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else { if (!isDirect && !Subtarget->hasV5TOps()) { CallOpc = ARMISD::CALL_NOLINK; - } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + } else if (doesNotRet && isDirect && Subtarget->hasRAS() && + // Emit regular call when code size is the priority + !HasMinSizeAttr) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -4153,6 +4159,36 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, return SDValue(); } +// check if an VEXT instruction can handle the shuffle mask when the +// vector sources of the shuffle are the same. +static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; + + if (M[i] < 0) continue; // ignore UNDEF indices + if (ExpectedElt != static_cast<unsigned>(M[i])) + return false; + } + + return true; +} + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { @@ -4912,6 +4948,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + if (V2->getOpcode() == ISD::UNDEF && + isSingletonVEXTMask(ShuffleMask, VT, Imm)) { + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, + DAG.getConstant(Imm, MVT::i32)); + } + // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ede4def2b7..3cf213cbff 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5140,23 +5140,25 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; } - -def VSETLNi8Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex8:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v16i8 QPR:$src1), - GPR:$R, imm:$lane))]>; -def VSETLNi16Q : PseudoNeonI<(outs QPR:$V), - (ins QPR:$src1, GPR:$R, VectorIndex16:$lane), - IIC_VMOVISL, "", - [(set QPR:$V, (vector_insert (v8i16 QPR:$src1), - GPR:$R, imm:$lane))]>; } - +def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), + (v16i8 (INSERT_SUBREG QPR:$src1, + (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i8_reg imm:$lane))), + GPR:$src2, (SubReg_i8_lane imm:$lane))), + (DSubReg_i8_reg imm:$lane)))>; +def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), + (v8i16 (INSERT_SUBREG QPR:$src1, + (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i16_reg imm:$lane))), + GPR:$src2, (SubReg_i16_lane imm:$lane))), + (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, - GPR:$src2, - (SSubReg_f32_reg imm:$lane)))>; + (v4i32 (INSERT_SUBREG QPR:$src1, + (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, + (DSubReg_i32_reg imm:$lane))), + GPR:$src2, (SubReg_i32_lane imm:$lane))), + (DSubReg_i32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e171f8b092..a5c0bc6740 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -223,6 +223,7 @@ def t_addrmode_sp : Operand<i32>, def t_addrmode_pc : Operand<i32> { let EncoderMethod = "getAddrModePCOpValue"; let DecoderMethod = "DecodeThumbAddrModePC"; + let PrintMethod = "printThumbLdrLabelOperand"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e10f4a865e..248bab6b12 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -159,7 +159,7 @@ def t2addrmode_imm12 : Operand<i32>, // t2ldrlabel := imm12 def t2ldrlabel : Operand<i32> { let EncoderMethod = "getAddrModeImm12OpValue"; - let PrintMethod = "printT2LdrLabelOperand"; + let PrintMethod = "printThumbLdrLabelOperand"; } def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} @@ -3245,11 +3245,11 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{15-14} = 0b10; let Inst{12} = 1; - bits<20> target; + bits<24> target; let Inst{26} = target{19}; let Inst{11} = target{18}; let Inst{13} = target{17}; - let Inst{21-16} = target{16-11}; + let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index ed8ac1aff7..b0f576bc2b 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -49,6 +49,9 @@ def ssub_0 : SubRegIndex; def ssub_1 : SubRegIndex; def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; + +def gsub_0 : SubRegIndex; +def gsub_1 : SubRegIndex; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -313,6 +316,17 @@ def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let AltOrderSelect = [{ return 1; }]; } +// Pseudo-registers representing even-odd pairs of GPRs from R1 to R13/SP. +// These are needed by instructions (e.g. ldrexd/strexd) requiring even-odd GPRs. +def Tuples2R : RegisterTuples<[gsub_0, gsub_1], + [(add R0, R2, R4, R6, R8, R10, R12), + (add R1, R3, R5, R7, R9, R11, SP)]>; + +// Register class representing a pair of even-odd GPRs. +def GPRPair : RegisterClass<"ARM", [untyped], 64, (add Tuples2R)> { + let Size = 64; // 2 x 32 bits, we have no predefined type of that size. +} + // Pseudo-registers representing 3 consecutive D registers. def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], [(shl DPR, 0), diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index cb3ac4d1f6..4c44f69f4d 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -155,8 +155,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, TargetLowering::ArgListEntry Entry; // First argument: data pointer - unsigned AS = DstPtrInfo.getAddrSpace(); - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 740548adbc..fed2d99e65 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -75,7 +75,6 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S64") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "v128:64:128-v64:64:64-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), @@ -107,7 +106,6 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:64:128-v64:64:64-a:0:32-n32-S32")), - ELFWriterInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget.hasThumb2() diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 25ab8295f0..cd6921e1ae 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -15,7 +15,6 @@ #define ARMTARGETMACHINE_H #include "ARMInstrInfo.h" -#include "ARMELFWriterInfo.h" #include "ARMFrameLowering.h" #include "ARMJITInfo.h" #include "ARMSubtarget.h" @@ -71,7 +70,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); ARMInstrInfo InstrInfo; const DataLayout DL; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; @@ -106,9 +104,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } }; /// ThumbTargetMachine - Thumb target machine. @@ -120,7 +115,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; const DataLayout DL; // Calculates type size & alignment - ARMELFWriterInfo ELFWriterInfo; ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. @@ -162,9 +156,6 @@ public: return &VTTI; } virtual const DataLayout *getDataLayout() const { return &DL; } - virtual const ARMELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 0eec8622e9..c61e3bd99d 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -253,7 +253,8 @@ public: // Implementation of the MCTargetAsmParser interface: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(StringRef Name, SMLoc NameLoc, + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); @@ -4954,7 +4955,8 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, +bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Apply mnemonic aliases before doing anything else, as the destination // mnemnonic may include suffices and we want to handle them normally. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index bf74a9df3b..1ea4e00867 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(ARMCodeGen ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp - ARMELFWriterInfo.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp ARMFrameLowering.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d2b1cc37f2..f00142de50 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2095,16 +2095,28 @@ static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - DecodeStatus S = MCDisassembler::Success; - unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) | - (fieldFromInstruction(Insn, 11, 1) << 18) | - (fieldFromInstruction(Insn, 13, 1) << 17) | - (fieldFromInstruction(Insn, 16, 6) << 11) | - (fieldFromInstruction(Insn, 26, 1) << 19); - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + DecodeStatus Status = MCDisassembler::Success; + + // Note the J1 and J2 values are from the encoded instruction. So here + // change them to I1 and I2 values via as documented: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with one trailing zero as documented: + // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32); + unsigned S = fieldFromInstruction(Insn, 26, 1); + unsigned J1 = fieldFromInstruction(Insn, 13, 1); + unsigned J2 = fieldFromInstruction(Insn, 11, 1); + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned imm10 = fieldFromInstruction(Insn, 16, 10); + unsigned imm11 = fieldFromInstruction(Insn, 0, 11); + unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; + int imm32 = SignExtend32<24>(tmp << 1); + if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); - return S; + Inst.addOperand(MCOperand::CreateImm(imm32)); + + return Status; } static DecodeStatus diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 7dcc3da6c3..beeabb6d42 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -348,8 +348,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void ARMInstPrinter::printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); if (MO1.isExpr()) O << *MO1.getExpr(); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 73d7bfd285..b7bab5fdcd 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -126,7 +126,8 @@ public: void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 48df199437..a85acaaa14 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,7 +1,6 @@ add_llvm_library(LLVMTarget Mangler.cpp Target.cpp - TargetELFWriterInfo.cpp TargetInstrInfo.cpp TargetIntrinsicInfo.cpp TargetJITInfo.cpp diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 61fb4e98ec..0f3efd8345 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -476,11 +476,11 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, unsigned index = PAL.getSlot(i).Index; AttrBuilder attrs(PAL.getSlot(i).Attrs); Out << "PAWI.Index = " << index << "U;\n"; - Out << " AttrBuilder B;\n"; + Out << " {\n AttrBuilder B;\n"; #define HANDLE_ATTR(X) \ if (attrs.hasAttribute(Attributes::X)) \ - Out << " B.addAttribute(Attributes::" #X ");\n"; \ + Out << " B.addAttribute(Attributes::" #X ");\n"; \ attrs.removeAttribute(Attributes::X); HANDLE_ATTR(SExt); @@ -507,13 +507,13 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(ReturnsTwice); HANDLE_ATTR(UWTable); HANDLE_ATTR(NonLazyBind); + HANDLE_ATTR(MinSize); #undef HANDLE_ATTR if (attrs.hasAttribute(Attributes::StackAlignment)) - Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")"; - nl(Out); + Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n"; attrs.removeAttribute(Attributes::StackAlignment); assert(!attrs.hasAttributes() && "Unhandled attribute!"); - Out << "PAWI.Attrs = Attributes::get(mod->getContext(), B);"; + Out << " PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }"; nl(Out); Out << "Attrs.push_back(PAWI);"; nl(Out); diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index e472d490e0..a64c7a1816 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -56,6 +56,16 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, bits<1> isPredicated = 0; let TSFlags{6} = isPredicated; + // Dot new value store instructions. + bits<1> isNVStore = 0; + let TSFlags{8} = isNVStore; + + // Fields used for relation models. + string BaseOpcode = ""; + string CextOpcode = ""; + string PredSense = ""; + string PNewValue = ""; + string InputType = ""; // Input is "imm" or "reg" type. // *** The code above must match HexagonBaseInfo.h *** } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index c8f933dcf4..8435440308 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/MathExtras.h" #define GET_INSTRINFO_CTOR +#define GET_INSTRMAP_INFO #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" @@ -1915,6 +1916,15 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int HexagonInstrInfo:: getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { + enum Hexagon::PredSense inPredSense; + inPredSense = invertPredicate ? Hexagon::PredSense_false : + Hexagon::PredSense_true; + int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); + if (CondOpcode >= 0) // Valid Conditional opcode/instruction + return CondOpcode; + + // This switch case will be removed once all the instructions have been + // modified to use relation maps. switch(Opc) { case Hexagon::TFR: return !invertPredicate ? Hexagon::TFR_cPt : @@ -1934,24 +1944,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::JMP_EQriPt_nv_V4: return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 : Hexagon::JMP_EQriNotPt_nv_V4; - case Hexagon::ADD_ri: - return !invertPredicate ? Hexagon::ADD_ri_cPt : - Hexagon::ADD_ri_cNotPt; - case Hexagon::ADD_rr: - return !invertPredicate ? Hexagon::ADD_rr_cPt : - Hexagon::ADD_rr_cNotPt; - case Hexagon::XOR_rr: - return !invertPredicate ? Hexagon::XOR_rr_cPt : - Hexagon::XOR_rr_cNotPt; - case Hexagon::AND_rr: - return !invertPredicate ? Hexagon::AND_rr_cPt : - Hexagon::AND_rr_cNotPt; - case Hexagon::OR_rr: - return !invertPredicate ? Hexagon::OR_rr_cPt : - Hexagon::OR_rr_cNotPt; - case Hexagon::SUB_rr: - return !invertPredicate ? Hexagon::SUB_rr_cPt : - Hexagon::SUB_rr_cNotPt; case Hexagon::COMBINE_rr: return !invertPredicate ? Hexagon::COMBINE_rr_cPt : Hexagon::COMBINE_rr_cNotPt; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index c0c0df6004..1d4a7060ad 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -15,6 +15,18 @@ include "HexagonInstrFormats.td" include "HexagonImmediates.td" //===----------------------------------------------------------------------===// +// Classes used for relation maps. +//===----------------------------------------------------------------------===// +// PredRel - Filter class used to relate non-predicated instructions with their +// predicated forms. +class PredRel; +// PredNewRel - Filter class used to relate predicated instructions with their +// predicate-new forms. +class PredNewRel: PredRel; +// ImmRegRel - Filter class used to relate instructions having reg-reg form +// with their reg-imm counterparts. +class ImmRegRel; +//===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// def HasV2T : Predicate<"Subtarget.hasV2TOps()">; @@ -148,37 +160,91 @@ multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { } //===----------------------------------------------------------------------===// -// ALU32/ALU + +// ALU32/ALU (Instructions with register-register form) //===----------------------------------------------------------------------===// -// Add. -let isCommutable = 1, isPredicable = 1 in -def ADD_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = add($src1, $src2)", - [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; +multiclass ALU32_Pbase<string mnemonic, bit isNot, + bit isPredNew> { -let isPredicable = 1 in -def ADD_ri : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s16Imm:$src2), - "$dst = add($src1, #$src2)", - [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), - s16ImmPred:$src2))]>; + let PNewValue = #!if(isPredNew, "new", "") in + def #NAME# : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, $src3)", + []>; +} -// Logical operations. -let isPredicable = 1 in -def XOR_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = xor($src1, $src2)", - [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; +multiclass ALU32_Pred<string mnemonic, bit PredNot> { + let PredSense = #!if(PredNot, "false", "true") in { + defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>; + } +} -let isCommutable = 1, isPredicable = 1 in -def AND_rr : ALU32_rr<(outs IntRegs:$dst), +let InputType = "reg" in +multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in { + let isPredicable = 1 in + def #NAME# : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = and($src1, $src2)", - [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; + "$dst = "#mnemonic#"($src1, $src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<mnemonic, 0>; + defm NotPt : ALU32_Pred<mnemonic, 1>; + } + } +} + +let isCommutable = 1 in { + defm ADD_rr : ALU32_base<"add", "ADD", add>, ImmRegRel, PredNewRel; + defm AND_rr : ALU32_base<"and", "AND", and>, ImmRegRel, PredNewRel; + defm XOR_rr : ALU32_base<"xor", "XOR", xor>, ImmRegRel, PredNewRel; + defm OR_rr : ALU32_base<"or", "OR", or>, ImmRegRel, PredNewRel; +} + +defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; + +//===----------------------------------------------------------------------===// +// ALU32/ALU (ADD with register-immediate form) +//===----------------------------------------------------------------------===// +multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let PNewValue = #!if(isPredNew, "new", "") in + def #NAME# : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm: $src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2, #$src3)", + []>; +} + +multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { + let PredSense = #!if(PredNot, "false", "true") in { + defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>; + } +} + +let InputType = "imm" in +multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> { + let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in { + let isPredicable = 1 in + def #NAME# : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s16Imm:$src2), + "$dst = "#mnemonic#"($src1, #$src2)", + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), + (s16ImmPred:$src2)))]>; + + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32ri_Pred<mnemonic, 0>; + defm NotPt : ALU32ri_Pred<mnemonic, 1>; + } + } +} + +defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel; def OR_ri : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), @@ -197,13 +263,6 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst), [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), s10ImmPred:$src2))]>; -let isCommutable = 1, isPredicable = 1 in -def OR_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = or($src1, $src2)", - [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - // Negate. def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = neg($src1)", @@ -214,14 +273,6 @@ def NOP : ALU32_rr<(outs), (ins), "nop", []>; -// Subtract. -let isPredicable = 1 in -def SUB_rr : ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, IntRegs:$src2), - "$dst = sub($src1, $src2)", - [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), - (i32 IntRegs:$src2)))]>; - // Rd32=sub(#s10,Rs32) def SUB_ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), @@ -348,56 +399,6 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), // ALU32/PRED + //===----------------------------------------------------------------------===// -// Conditional add. -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if ($src1) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if (!$src1) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if ($src1.new) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), - "if (!$src1.new) $dst = add($src2, #$src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = add($src2, $src3)", - []>; - -let neverHasSideEffects = 1, isPredicated = 1 in -def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = add($src2, $src3)", - []>; - - // Conditional combine. let neverHasSideEffects = 1, isPredicated = 1 in @@ -424,108 +425,6 @@ def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), "if (!$src1.new) $dst = combine($src2, $src3)", []>; -// Conditional logical operations. - -let isPredicated = 1 in -def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = xor($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = and($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = or($src2, $src3)", - []>; - -let isPredicated = 1 in -def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = or($src2, $src3)", - []>; - - -// Conditional subtract. - -let isPredicated = 1 in -def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = sub($src2, $src3)", - []>; - -let isPredicated = 1 in -def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = sub($src2, $src3)", - []>; - - // Conditional transfer. let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), @@ -3546,4 +3445,31 @@ include "HexagonInstrInfoV5.td" // V5 Instructions - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Generate mapping table to relate non-predicate instructions with their +// predicated formats - true and false. +// + +def getPredOpcode : InstrMapping { + let FilterClass = "PredRel"; + // Instructions with the same BaseOpcode and isNVStore values form a row. + let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"]; + // Instructions with the same predicate sense form a column. + let ColFields = ["PredSense"]; + // The key column is the unpredicated instructions. + let KeyCol = [""]; + // Value columns are PredSense=true and PredSense=false + let ValueCols = [["true"], ["false"]]; +} +//===----------------------------------------------------------------------===// +// Generate mapping table to relate predicated instructions with their .new +// format. +// +def getPredNewOpcode : InstrMapping { + let FilterClass = "PredNewRel"; + let RowFields = ["BaseOpcode", "PredSense", "isNVStore"]; + let ColFields = ["PNewValue"]; + let KeyCol = [""]; + let ValueCols = [["new"]]; +} diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 9e28a3d7d0..f7809caeb3 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -61,7 +61,8 @@ public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) : MCTargetAsmParser(), Parser(_Parser) {} - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -477,7 +478,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Parse an mblaze instruction mnemonic followed by its operands. bool MBlazeAsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The first operands is the token for the instruction name size_t dotLoc = Name.find('.'); diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 6c3e8b6447..0bf93d71da 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_target(MBlazeCodeGen MBlazeSelectionDAGInfo.cpp MBlazeAsmPrinter.cpp MBlazeMCInstLower.cpp - MBlazeELFWriterInfo.cpp ) add_dependencies(LLVMMBlazeCodeGen intrinsics_gen) diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp deleted file mode 100644 index 6b575099e5..0000000000 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ /dev/null @@ -1,107 +0,0 @@ -//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the MBlaze backend. -// -//===----------------------------------------------------------------------===// - -#include "MBlazeELFWriterInfo.h" -#include "MBlazeRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the MBlazeELFWriterInfo class -//===----------------------------------------------------------------------===// - -MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64, - TM.getDataLayout()->isLittleEndian()) { -} - -MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {} - -unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch (MachineRelTy) { - case MBlaze::reloc_pcrel_word: - return ELF::R_MICROBLAZE_64_PCREL; - case MBlaze::reloc_absolute_word: - return ELF::R_MICROBLAZE_NONE; - default: - llvm_unreachable("unknown mblaze machine relocation type"); - } -} - -long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - switch (RelTy) { - case ELF::R_MICROBLAZE_32_PCREL: - return Modifier - 4; - case ELF::R_MICROBLAZE_32: - return Modifier; - default: - llvm_unreachable("unknown mblaze relocation type"); - } -} - -unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - // FIXME: Most of these sizes are guesses based on the name - switch (RelTy) { - case ELF::R_MICROBLAZE_32: - case ELF::R_MICROBLAZE_32_PCREL: - case ELF::R_MICROBLAZE_32_PCREL_LO: - case ELF::R_MICROBLAZE_32_LO: - case ELF::R_MICROBLAZE_SRO32: - case ELF::R_MICROBLAZE_SRW32: - case ELF::R_MICROBLAZE_32_SYM_OP_SYM: - case ELF::R_MICROBLAZE_GOTOFF_32: - return 32; - - case ELF::R_MICROBLAZE_64_PCREL: - case ELF::R_MICROBLAZE_64: - case ELF::R_MICROBLAZE_GOTPC_64: - case ELF::R_MICROBLAZE_GOT_64: - case ELF::R_MICROBLAZE_PLT_64: - case ELF::R_MICROBLAZE_GOTOFF_64: - return 64; - } - - return 0; -} - -bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - // FIXME: Most of these are guesses based on the name - switch (RelTy) { - case ELF::R_MICROBLAZE_32_PCREL: - case ELF::R_MICROBLAZE_64_PCREL: - case ELF::R_MICROBLAZE_32_PCREL_LO: - case ELF::R_MICROBLAZE_GOTPC_64: - return true; - } - - return false; -} - -unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - return MBlaze::reloc_absolute_word; -} - -long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - assert((RelTy == ELF::R_MICROBLAZE_32_PCREL || - RelTy == ELF::R_MICROBLAZE_64_PCREL) && - "computeRelocation unknown for this relocation type"); - return SymOffset - (RelOffset + 4); -} diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h deleted file mode 100644 index a314eb76ea..0000000000 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the MBlaze backend. -// -//===----------------------------------------------------------------------===// - -#ifndef MBLAZE_ELF_WRITER_INFO_H -#define MBLAZE_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - class TargetMachine; - - class MBlazeELFWriterInfo : public TargetELFWriterInfo { - public: - MBlazeELFWriterInfo(TargetMachine &TM); - virtual ~MBlazeELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // MBLAZE_ELF_WRITER_INFO_H diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 1ae2baa198..f180652f11 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -41,7 +41,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, DL("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), + TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()), STTI(&TLInfo), VTTI(&TLInfo) { } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 34648b9b9a..a8df4e63e3 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -20,7 +20,6 @@ #include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" #include "MBlazeFrameLowering.h" -#include "MBlazeELFWriterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" @@ -38,7 +37,6 @@ namespace llvm { MBlazeTargetLowering TLInfo; MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; - MBlazeELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; ScalarTargetTransformImpl STTI; VectorTargetTransformImpl VTTI; @@ -77,9 +75,6 @@ namespace llvm { const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } - virtual const MBlazeELFWriterInfo *getELFWriterInfo() const { - return &ELFWriterInfo; - } virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { return &STTI; } virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 113378a5f3..fc677aec38 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -881,7 +881,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = TD->getPointerSize(0); + uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, true); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -901,7 +901,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(TD->getPointerSize(0), MVT::i16); + DAG.getConstant(TD->getPointerSize(), MVT::i16); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 00649d2f18..67b524883c 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -74,7 +74,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - bool ParseInstruction(StringRef Name, SMLoc NameLoc, + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool parseMathOperation(StringRef Name, SMLoc NameLoc, @@ -1056,7 +1057,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc, } bool MipsAsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // floating point instructions: should register be treated as double? if (requestsDoubleOperand(Name)) { diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 3cc2821d47..ef56e752b2 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp - MipsELFWriterInfo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 056a74efc4..4e6b21feb5 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -41,6 +41,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust stack. if (isInt<16>(-StackSize)) BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); + + if (hasFP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) + .addReg(Mips::SP); + } void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, @@ -55,6 +60,10 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, if (!StackSize) return; + if (hasFP(MF)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::Move32R16), Mips::SP) + .addReg(Mips::S0); + // Adjust stack. if (isInt<16>(StackSize)) // assumes stacksize multiple of 8 @@ -106,8 +115,10 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - // FIXME: implement. - return true; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Reserve call frame if the size of the maximum call frame fits into 15-bit + // immediate field and there are no variable sized objects on the stack. + return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); } void Mips16FrameLowering:: diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 5e33fed0cc..619646b317 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, Mips::BimmX16), - RI(*tm.getSubtargetImpl()) {} + RI(*tm.getSubtargetImpl(), *this) {} const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { return RI; @@ -126,7 +126,7 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA16: - ExpandRetRA16(MBB, MI, Mips::JrRa16); + ExpandRetRA16(MBB, MI, Mips::JrcRa16); break; } @@ -160,6 +160,22 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { return 0; } +/// Adjust SP by Amount bytes. +void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + if (isInt<16>(Amount)) { + if (Amount < 0) + BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount); + else if (Amount > 0) + BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount); + } + else + // not implemented for large values yet + assert(false && "adjust stack pointer amount exceeded"); +} + unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 || diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 260c5b69b2..e06ccfe61c 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -64,6 +64,10 @@ public: virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + /// Adjust SP by Amount bytes. + void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 043b974232..5defc75ea6 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -11,11 +11,22 @@ // //===----------------------------------------------------------------------===// // +// +// Mips Address +// +def addr16 : + ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand def mem16 : Operand<i32> { let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPU16Regs, simm16, CPU16Regs); + let EncoderMethod = "getMemEncoding"; +} + +def mem16_ea : Operand<i32> { + let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPU16Regs, simm16); let EncoderMethod = "getMemEncoding"; } @@ -119,6 +130,16 @@ class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd, !strconcat(asmstr, "\t$ry, $addr"), [], itin>; // +// +// EXT-RRI-A instruction format +// + +class FEXT_RRI_A16_mem_ins<bits<1> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI_A16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + +// // EXT-SHIFT instruction format // class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: @@ -194,6 +215,10 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rx, $ry"), [], itin> { } +class FRRTR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry\n\tmove\t$rz, $$t8"), [], itin> ; + // // maybe refactor but need a $zero as a dummy first parameter // @@ -201,6 +226,11 @@ class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> : FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry), !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ; +class FUnaryRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> ; + + class FRR16_M_ins<bits<5> f, string asmstr, InstrItinClass itin> : FRR16<f, (outs CPU16Regs:$rx), (ins), @@ -220,6 +250,12 @@ class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), [], itin> ; + +class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra, + string asmstr, InstrItinClass itin>: + FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx), + !strconcat(asmstr, "\t $rx"), [], itin> ; + // // RRR-type instruction format // @@ -229,6 +265,95 @@ class FRRR16_ins<bits<2> _f, string asmstr, InstrItinClass itin> : !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>; // +// These Sel patterns support the generation of conditional move +// pseudo instructions. +// +// The nomenclature uses the components making up the pseudo and may +// be a bit counter intuitive when compared with the end result we seek. +// For example using a bqez in the example directly below results in the +// conditional move being done if the tested register is not zero. +// I considered in easier to check by keeping the pseudo consistent with +// it's components but it could have been done differently. +// +// The simplest case is when can test and operand directly and do the +// conditional move based on a simple mips16 conditional +// branch instruction. +// for example: +// if $op == beqz or bnez: +// +// $op1 $rt, .+4 +// move $rd, $rs +// +// if $op == beqz, then if $rt != 0, then the conditional assignment +// $rd = $rs is done. + +// if $op == bnez, then if $rt == 0, then the conditional assignment +// $rd = $rs is done. +// +// So this pseudo class only has one operand, i.e. op +// +class Sel<bits<5> f1, string op, InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rt), + !strconcat(op, "\t$rt, .+4\n\t\n\tmove $rd, $rs"), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; +} + +// +// The next two instruction classes allow for an operand which tests +// two operands and returns a value in register T8 and +//then does a conditional branch based on the value of T8 +// + +// op2 can be cmpi or slti/sltiu +// op1 can bteqz or btnez +// the operands for op2 are a register and a signed constant +// +// $op2 $t, $imm ;test register t and branch conditionally +// $op1 .+4 ;op1 is a conditional branch +// move $rd, $rs +// +// +class SeliT<bits<5> f1, string op1, bits<5> f2, string op2, + InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, simm16:$imm), + !strconcat(op2, + !strconcat("\t$rl, $imm\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; +} + +// +// op2 can be cmp or slt/sltu +// op1 can be bteqz or btnez +// the operands for op2 are two registers +// op1 is a conditional branch +// +// +// $op2 $rl, $rr ;test registers rl,rr +// $op1 .+4 ;op2 is a conditional branch +// move $rd, $rs +// +// +class SelT<bits<5> f1, string op1, bits<5> f2, string op2, + InstrItinClass itin>: + MipsInst16_32<(outs CPU16Regs:$rd_), (ins CPU16Regs:$rd, CPU16Regs:$rs, + CPU16Regs:$rl, CPU16Regs:$rr), + !strconcat(op2, + !strconcat("\t$rl, $rr\n\t", + !strconcat(op1, "\t.+4\n\tmove $rd, $rs"))), [], itin, + Pseudo16> { + let isCodeGenOnly=1; + let Constraints = "$rd = $rd_"; +} + + +// // Some general instruction class info // // @@ -269,6 +394,9 @@ def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, ArithLogic16Defs<0>; +def AddiuRxRyOffMemX16: + FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>; + // // Format: ADDIU rx, pc, immediate MIPS16e @@ -379,8 +507,27 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { // address register. // -def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; +def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let hasDelaySlot = 1; + let isTerminator=1; + let isBarrier=1; +} + +def JrcRa16: FRR16_JALRC_RA_only_ins<0, 0, "jrc", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let isTerminator=1; + let isBarrier=1; +} +def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> { + let isBranch = 1; + let isIndirectBranch = 1; + let isTerminator=1; + let isBarrier=1; +} // // Format: LB ry, offset(rx) MIPS16e // Purpose: Load Byte (Extended) @@ -507,14 +654,14 @@ def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> { // Purpose: Negate // To negate an integer value. // -def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>; +def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>; // // Format: NOT rx, ry MIPS16e // Purpose: Not // To complement an integer value // -def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>; +def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>; // // Format: OR rx, ry MIPS16e @@ -539,7 +686,17 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; let ra=1, s=0,s0=1,s1=1 in def RestoreRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { + "restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { + let isCodeGenOnly = 1; +} + +// Use Restore to increment SP since SP is not a Mip 16 register, this +// is an easy way to do that which does not require a register. +// +let ra=0, s=0,s0=0,s1=0 in +def RestoreIncSpF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "restore\t$frame_size", [], IILoad >, MayLoad { let isCodeGenOnly = 1; } @@ -553,7 +710,18 @@ def RestoreRaF16: let ra=1, s=1,s0=1,s1=1 in def SaveRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save \t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { + "save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { + let isCodeGenOnly = 1; +} + +// +// Use Save to decrement the SP by a constant since SP is not +// a Mips16 register. +// +let ra=0, s=0,s0=0,s1=0 in +def SaveDecSpF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "save\t$frame_size", [], IIStore >, MayStore { let isCodeGenOnly = 1; } // @@ -565,6 +733,120 @@ def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore; // +// The Sel(T) instructions are pseudos +// T means that they use T8 implicitly. +// +// +// Format: SelBeqZ rd, rs, rt +// Purpose: if rt==0, do nothing +// else rs = rt +// +def SelBeqZ: Sel<0b00100, "beqz", IIAlu>; + +// +// Format: SelTBteqZCmp rd, rs, rl, rr +// Purpose: b = Cmp rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZCmp: SelT<0b000, "bteqz", 0b01010, "cmp", IIAlu>; + +// +// Format: SelTBteqZCmpi rd, rs, rl, rr +// Purpose: b = Cmpi rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZCmpi: SeliT<0b000, "bteqz", 0b01110, "cmpi", IIAlu>; + +// +// Format: SelTBteqZSlt rd, rs, rl, rr +// Purpose: b = Slt rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSlt: SelT<0b000, "bteqz", 0b00010, "slt", IIAlu>; + +// +// Format: SelTBteqZSlti rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSlti: SeliT<0b000, "bteqz", 0b01010, "slti", IIAlu>; + +// +// Format: SelTBteqZSltu rd, rs, rl, rr +// Purpose: b = Sltu rl, rr. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSltu: SelT<0b000, "bteqz", 0b00011, "sltu", IIAlu>; + +// +// Format: SelTBteqZSltiu rd, rs, rl, rr +// Purpose: b = Sltiu rl, imm. +// If b==0 then do nothing. +// if b!=0 then rd = rs +// +def SelTBteqZSltiu: SeliT<0b000, "bteqz", 0b01011, "sltiu", IIAlu>; + +// +// Format: SelBnez rd, rs, rt +// Purpose: if rt!=0, do nothing +// else rs = rt +// +def SelBneZ: Sel<0b00101, "bnez", IIAlu>; + +// +// Format: SelTBtneZCmp rd, rs, rl, rr +// Purpose: b = Cmp rl, rr. +// If b!=0 then do nothing. +// if b0=0 then rd = rs +// +def SelTBtneZCmp: SelT<0b001, "btnez", 0b01010, "cmp", IIAlu>; + +// +// Format: SelTBtnezCmpi rd, rs, rl, rr +// Purpose: b = Cmpi rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZCmpi: SeliT<0b000, "btnez", 0b01110, "cmpi", IIAlu>; + +// +// Format: SelTBtneZSlt rd, rs, rl, rr +// Purpose: b = Slt rl, rr. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSlt: SelT<0b001, "btnez", 0b00010, "slt", IIAlu>; + +// +// Format: SelTBtneZSlti rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSlti: SeliT<0b001, "btnez", 0b01010, "slti", IIAlu>; + +// +// Format: SelTBtneZSltu rd, rs, rl, rr +// Purpose: b = Sltu rl, rr. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSltu: SelT<0b001, "btnez", 0b00011, "sltu", IIAlu>; + +// +// Format: SelTBtneZSltiu rd, rs, rl, rr +// Purpose: b = Slti rl, imm. +// If b!=0 then do nothing. +// if b==0 then rd = rs +// +def SelTBtneZSltiu: SeliT<0b001, "btnez", 0b01011, "sltiu", IIAlu>; +// +// // Format: SH ry, offset(rx) MIPS16e // Purpose: Store Halfword (Extended) // To store a halfword to memory. @@ -613,6 +895,9 @@ def SltCCRxRy16: FCCRR16_ins<0b00010, "slt", IIAlu>; // Purpose: Set on Less Than Unsigned // To record the result of an unsigned less-than comparison. // +def SltuRxRyRz16: FRRTR16_ins<0b00011, "sltu", IIAlu> { + let isCodeGenOnly=1; +} def SltuCCRxRy16: FCCRR16_ins<0b00011, "sltu", IIAlu>; @@ -725,7 +1010,7 @@ def: shift_rotate_reg16_pat<sra, SravRxRy16>; def: shift_rotate_reg16_pat<srl, SrlvRxRy16>; class LoadM16_pat<PatFrag OpNode, Instruction I> : - Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>; + Mips16Pat<(OpNode addr16:$addr), (I addr16:$addr)>; def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>; def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>; @@ -734,7 +1019,8 @@ def: LoadM16_pat<zextloadi16, LhuRxRyOffMemX16>; def: LoadM16_pat<load, LwRxRyOffMemX16>; class StoreM16_pat<PatFrag OpNode, Instruction I> : - Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>; + Mips16Pat<(OpNode CPU16Regs:$r, addr16:$addr), + (I CPU16Regs:$r, addr16:$addr)>; def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>; def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>; @@ -746,11 +1032,17 @@ class UncondBranch16_pat<SDNode OpNode, Instruction I>: let Predicates = [RelocPIC, InMips16Mode]; } +// Indirect branch +def: Mips16Pat< + (brind CPU16Regs:$rs), + (JrcRx16 CPU16Regs:$rs)>; + + // Jump and Link (Call) -let isCall=1, hasDelaySlot=1 in +let isCall=1, hasDelaySlot=0 in def JumpLinkReg16: FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs), - "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; // Mips16 pseudos let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, @@ -766,8 +1058,25 @@ class SetCC_R16<PatFrag cond_op, Instruction I>: class SetCC_I16<PatFrag cond_op, PatLeaf imm_type, Instruction I>: Mips16Pat<(cond_op CPU16Regs:$rx, imm_type:$imm16), - (I CPU16Regs:$rx, imm_type:$imm16)>; + (I CPU16Regs:$rx, imm_type:$imm16)>; + + +def: Mips16Pat<(i32 addr16:$addr), + (AddiuRxRyOffMemX16 addr16:$addr)>; + + +// Large (>16 bit) immediate loads +def : Mips16Pat<(i32 imm:$imm), + (OrRxRxRy16 (SllX16 (LiRxImmX16 (HI16 imm:$imm)), 16), + (LiRxImmX16 (LO16 imm:$imm)))>; +// Carry MipsPatterns +def : Mips16Pat<(subc CPU16Regs:$lhs, CPU16Regs:$rhs), + (SubuRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>; +def : Mips16Pat<(addc CPU16Regs:$lhs, CPU16Regs:$rhs), + (AdduRxRyRz16 CPU16Regs:$lhs, CPU16Regs:$rhs)>; +def : Mips16Pat<(addc CPU16Regs:$src, immSExt16:$imm), + (AddiuRxRxImmX16 CPU16Regs:$src, imm:$imm)>; // // Some branch conditional patterns are not generated by llvm at this time. @@ -922,6 +1231,180 @@ def: Mips16Pat <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry), (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; +// signed a,b +// x = (a>=b)?x:y +// +// if !(a < b) x = y +// +def : Mips16Pat<(select (i32 (setge CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, CPU16Regs:$b)>; + +// signed a,b +// x = (a>b)?x:y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setgt CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// unsigned a,b +// x = (a>=b)?x:y +// +// if !(a < b) x = y; +// +def : Mips16Pat< + (select (i32 (setuge CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, CPU16Regs:$b)>; + +// unsigned a,b +// x = (a>b)?x:y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setugt CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// signed +// x = (a >= k)?x:y +// due to an llvm optimization, i don't think that this will ever +// be used. This is transformed into x = (a > k-1)?x:y +// +// + +//def : Mips16Pat< +// (select (i32 (setge CPU16Regs:$lhs, immSExt16:$rhs)), +// CPU16Regs:$T, CPU16Regs:$F), +// (SelTBteqZSlti CPU16Regs:$T, CPU16Regs:$F, +// CPU16Regs:$lhs, immSExt16:$rhs)>; + +//def : Mips16Pat< +// (select (i32 (setuge CPU16Regs:$lhs, immSExt16:$rhs)), +// CPU16Regs:$T, CPU16Regs:$F), +// (SelTBteqZSltiu CPU16Regs:$T, CPU16Regs:$F, +// CPU16Regs:$lhs, immSExt16:$rhs)>; + +// signed +// x = (a < k)?x:y +// +// if !(a < k) x = y; +// +def : Mips16Pat< + (select (i32 (setlt CPU16Regs:$a, immSExt16:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZSlti CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immSExt16:$b)>; + + +// +// +// signed +// x = (a <= b)? x : y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setle CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSlt CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// unnsigned +// x = (a <= b)? x : y +// +// if (b < a) x = y +// +def : Mips16Pat<(select (i32 (setule CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZSltu CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a == b)? x : y +// +// if (a != b) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZCmp CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a == 0)? x : y +// +// if (a != 0) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, 0)), + CPU16Regs:$x, CPU16Regs:$y), + (SelBeqZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + + +// +// signed/unsigned +// x = (a == k)? x : y +// +// if (a != k) x = y +// +def : Mips16Pat<(select (i32 (seteq CPU16Regs:$a, immZExt16:$k)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBteqZCmpi CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immZExt16:$k)>; + + +// +// signed/unsigned +// x = (a != b)? x : y +// +// if (a == b) x = y +// +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, CPU16Regs:$b)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZCmp CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$b, CPU16Regs:$a)>; + +// +// signed/unsigned +// x = (a != 0)? x : y +// +// if (a == 0) x = y +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, 0)), + CPU16Regs:$x, CPU16Regs:$y), + (SelBneZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + +// signed/unsigned +// x = (a)? x : y +// +// if (!a) x = y +// +def : Mips16Pat<(select CPU16Regs:$a, + CPU16Regs:$x, CPU16Regs:$y), + (SelBneZ CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a)>; + + +// +// signed/unsigned +// x = (a != k)? x : y +// +// if (a == k) x = y +// +def : Mips16Pat<(select (i32 (setne CPU16Regs:$a, immZExt16:$k)), + CPU16Regs:$x, CPU16Regs:$y), + (SelTBtneZCmpi CPU16Regs:$x, CPU16Regs:$y, + CPU16Regs:$a, immZExt16:$k)>; // // When writing C code to test setxx these patterns, @@ -1013,7 +1496,7 @@ def: Mips16Pat //def: Mips16Pat // <(setuge CPU16Regs:$lhs, immZExt16:$rhs), // (XorRxRxRy16 (SltiuCCRxImmX16 CPU16Regs:$lhs, immZExt16:$rhs), -// (LiRxImmX16 1))>; +// (LiRxImmX16 1))>; // // setugt @@ -1038,3 +1521,22 @@ def: SetCC_I16<setult, immSExt16, SltiuCCRxImmX16>; def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; + +// hi/lo relocs + +def : Mips16Pat<(MipsHi tglobaltlsaddr:$in), + (SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>; + +// wrapper_pic +class Wrapper16Pat<SDNode node, Instruction ADDiuOp, RegisterClass RC>: + Mips16Pat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; + + +def : Wrapper16Pat<tglobaladdr, AddiuRxRxImmX16, CPU16Regs>; +def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>; + +def : Mips16Pat<(i32 (extloadi8 addr16:$src)), + (LbuRxRyOffMemX16 addr16:$src)>; +def : Mips16Pat<(i32 (extloadi16 addr16:$src)), + (LhuRxRyOffMemX16 addr16:$src)>;
\ No newline at end of file diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index bfc6b6cabf..d7397a32f0 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Mips16RegisterInfo.h" +#include "Mips16InstrInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" @@ -38,15 +39,28 @@ using namespace llvm; -Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST) - : MipsRegisterInfo(ST) {} +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, + const Mips16InstrInfo &I) + : MipsRegisterInfo(ST), TII(I) {} // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void Mips16RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const Mips16InstrInfo *II = static_cast<const Mips16InstrInfo*>(&TII); + + II->adjustStackPtr(Mips::SP, Amount, MBB, I); + } + MBB.erase(I); } @@ -54,51 +68,60 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, int64_t SPOffset) const { - MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always - // referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg; - - if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always + // referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + FrameReg = Mips::SP; + else { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (TFI->hasFP(MF)) { + FrameReg = Mips::S0; + } + else { + if ((MI.getNumOperands()> OpNo+2) && MI.getOperand(OpNo+2).isReg()) + FrameReg = MI.getOperand(OpNo+2).getReg(); else - FrameReg = getFrameRegister(MF); - - // Calculate final offset. - // - There is no need to change the offset if the frame object - // is one of the - // following: an outgoing argument, pointer to a dynamically allocated - // stack space or a $gp restore location, - // - If the frame object is any of the following, - // its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int64_t Offset; - - Offset = SPOffset + (int64_t)StackSize; - Offset += MI.getOperand(OpNo + 1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); - MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); + FrameReg = Mips::SP; + } + } + // Calculate final offset. + // - There is no need to change the offset if the frame object + // is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, + // its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); + + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); } diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index c702a15f60..153def20d0 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -20,8 +20,9 @@ namespace llvm { class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { + const Mips16InstrInfo &TII; public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget); + Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index ed0ea0e849..a6111689c7 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -86,7 +86,7 @@ let DecoderNamespace = "Mips64" in { def DADDi : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16, CPU64Regs>; def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, - CPU64Regs>; + CPU64Regs>, IsAsCheapAsAMove; def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>; def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index b45e7af102..9bb39a424c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -250,8 +250,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); } } @@ -262,9 +261,7 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tat")); - + OutStreamer.EmitRawText(StringRef("\t.set\tat")); OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 19213fa673..78cf140def 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -35,9 +35,6 @@ def RetCC_MipsO32 : CallingConv<[ //===----------------------------------------------------------------------===// def CC_MipsN : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCCustom<"CC_Mips64Byval">>, - // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -72,9 +69,6 @@ def CC_MipsN : CallingConv<[ // N32/64 variable arguments. // All arguments are passed in integer registers. def CC_MipsN_VarArg : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCCustom<"CC_Mips64Byval">>, - // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -211,12 +205,6 @@ def CC_Mips_FastCC : CallingConv<[ // Mips Calling Convention Dispatch //===----------------------------------------------------------------------===// -def CC_Mips : CallingConv<[ - CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, - CCIfSubtarget<"isABI_N32()", CCDelegateTo<CC_MipsN>>, - CCIfSubtarget<"isABI_N64()", CCDelegateTo<CC_MipsN>> -]>; - def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp deleted file mode 100644 index 786e6554f0..0000000000 --- a/lib/Target/Mips/MipsELFWriterInfo.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the Mips backend. -// -//===----------------------------------------------------------------------===// - -#include "MipsELFWriterInfo.h" -#include "MipsRelocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ELF.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the MipsELFWriterInfo class -//===----------------------------------------------------------------------===// - -MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_) - : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { - EMachine = EM_MIPS; -} - -MipsELFWriterInfo::~MipsELFWriterInfo() {} - -unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - switch(MachineRelTy) { - case Mips::reloc_mips_pc16: - return ELF::R_MIPS_GOT16; - case Mips::reloc_mips_hi: - return ELF::R_MIPS_HI16; - case Mips::reloc_mips_lo: - return ELF::R_MIPS_LO16; - case Mips::reloc_mips_26: - return ELF::R_MIPS_26; - default: - llvm_unreachable("unknown Mips machine relocation type"); - } -} - -long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - switch(RelTy) { - case ELF::R_MIPS_26: return Modifier; - default: - llvm_unreachable("unknown Mips relocation type"); - } -} - -unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - switch(RelTy) { - case ELF::R_MIPS_GOT16: - case ELF::R_MIPS_26: - return 32; - default: - llvm_unreachable("unknown Mips relocation type"); - } -} - -bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - switch(RelTy) { - case ELF::R_MIPS_GOT16: - return true; - case ELF::R_MIPS_26: - return false; - default: - llvm_unreachable("unknown Mips relocation type"); - } -} - -unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - return Mips::reloc_mips_26; -} - -long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - - if (RelTy == ELF::R_MIPS_GOT16) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); -} diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h deleted file mode 100644 index 23f3f03b35..0000000000 --- a/lib/Target/Mips/MipsELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the Mips backend. -// -//===----------------------------------------------------------------------===// - -#ifndef MIPS_ELF_WRITER_INFO_H -#define MIPS_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - - class MipsELFWriterInfo : public TargetELFWriterInfo { - - public: - MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_); - virtual ~MipsELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // MIPS_ELF_WRITER_INFO_H diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 6dd251c054..2cad2a6264 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -98,3 +98,37 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } + +uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); + + int64_t Offset = 0; + + // Iterate over fixed sized objects. + for (int I = MFI->getObjectIndexBegin(); I != 0; ++I) + Offset = std::max(Offset, -MFI->getObjectOffset(I)); + + // Conservatively assume all callee-saved registers will be saved. + for (const uint16_t *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); + Offset = RoundUpToAlignment(Offset + Size, Size); + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Check that MaxAlign is not zero if there is a stack object that is not a + // callee-saved spill. + assert(!MFI->getObjectIndexEnd() || MaxAlign); + + // Iterate over other objects. + for (unsigned I = 0, E = MFI->getObjectIndexEnd(); I != E; ++I) + Offset = RoundUpToAlignment(Offset + MFI->getObjectSize(I), MaxAlign); + + // Call frame. + if (MFI->adjustsStack() && hasReservedCallFrame(MF)) + Offset = RoundUpToAlignment(Offset + MFI->getMaxCallFrameSize(), + std::max(MaxAlign, getStackAlignment())); + + return RoundUpToAlignment(Offset, getStackAlignment()); +} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index ed7b7fe76c..df52d92da8 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -34,6 +34,9 @@ public: const MipsSubtarget &ST); bool hasFP(const MachineFunction &MF) const; + +protected: + uint64_t estimateStackSize(const MachineFunction &MF) const; }; /// Create MipsInstrInfo objects. diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index f99ff15157..778fe34275 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,6 +86,10 @@ private: SDNode *getGlobalBaseReg(); + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); + std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, bool HasLo, bool HasHi); @@ -94,6 +98,9 @@ private: // Complex Pattern. bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Alias); + // getImm - Return a target constant with the specified value. inline SDValue getImm(const SDNode *Node, unsigned Imm) { return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); @@ -102,6 +109,7 @@ private: void ProcessFunctionAfterISel(MachineFunction &MF); bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); void InitGlobalBaseReg(MachineFunction &MF); + void InitMips16SPAliasReg(MachineFunction &MF); virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, @@ -220,6 +228,26 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addReg(Mips::V0).addReg(Mips::T9); } +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + + bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr& MI) { unsigned DstReg = 0, ZeroReg = 0; @@ -260,6 +288,7 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { InitGlobalBaseReg(MF); + InitMips16SPAliasReg(MF); MachineRegisterInfo *MRI = &MF.getRegInfo(); @@ -284,6 +313,14 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: @@ -362,6 +399,115 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} +bool MipsDAGToDAGISel::SelectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasMips32r2Or64()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + /// Select multiply instructions. std::pair<SDNode*, SDNode*> MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, @@ -413,6 +559,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { case ISD::SUBE: case ISD::ADDE: { + bool inMips16Mode = Subtarget.inMips16Mode(); SDValue InFlag = Node->getOperand(2), CmpLHS; unsigned Opc = InFlag.getOpcode(); (void)Opc; assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || @@ -422,10 +569,16 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned MOp; if (Opcode == ISD::ADDE) { CmpLHS = InFlag.getValue(0); - MOp = Mips::ADDu; + if (inMips16Mode) + MOp = Mips::AdduRxRyRz16; + else + MOp = Mips::ADDu; } else { CmpLHS = InFlag.getOperand(0); - MOp = Mips::SUBu; + if (inMips16Mode) + MOp = Mips::SubuRxRyRz16; + else + MOp = Mips::SUBu; } SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; @@ -434,8 +587,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + + unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); + unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, SDValue(Carry,0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ae89cdd693..a8810d238c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -46,6 +46,20 @@ static cl::opt<bool> EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); +static const uint16_t O32IntRegs[4] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 +}; + +static const uint16_t Mips64IntRegs[8] = { + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64 +}; + +static const uint16_t Mips64DPRegs[8] = { + Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 +}; + // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). @@ -198,8 +212,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (Subtarget->inMips16Mode()) { + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + } + else { + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + } if (!Subtarget->inMips16Mode()) { setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); @@ -306,6 +326,21 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + if (Subtarget->inMips16Mode()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + } + setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -2603,16 +2638,9 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, Mips::D6, Mips::D7 }; - // ByVal Args - if (ArgFlags.isByVal()) { - State.HandleByVal(ValNo, ValVT, LocVT, LocInfo, - 1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags); - unsigned NextReg = (State.getNextStackOffset() + 3) / 4; - for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize); - r < std::min(IntRegsSize, NextReg); ++r) - State.AllocateReg(IntRegs[r]); - return false; - } + // Do not process byval args here. + if (ArgFlags.isByVal()) + return true; // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { @@ -2667,296 +2695,68 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, } else llvm_unreachable("Cannot handle this ValVT."); - unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; - unsigned Offset; - if (!ArgFlags.isSRet()) - Offset = State.AllocateStack(SizeInBytes, OrigAlign); - else - Offset = State.AllocateStack(SizeInBytes, SizeInBytes); - - if (!Reg) + if (!Reg) { + unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() >> 3, + OrigAlign); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - else + } else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; // CC must always match -} - -static const uint16_t Mips64IntRegs[8] = - {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, - Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; -static const uint16_t Mips64DPRegs[8] = - {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, - Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; - -static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8); - unsigned Size = (ArgFlags.getByValSize() + 7) / 8 * 8; - unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8); - - assert(Align <= 16 && "Cannot handle alignments larger than 16."); - - // If byval is 16-byte aligned, the first arg register must be even. - if ((Align == 16) && (FirstIdx % 2)) { - State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]); - ++FirstIdx; - } - - // Mark the registers allocated. - for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I) - State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]); - - // Allocate space on caller's stack. - unsigned Offset = State.AllocateStack(Size, Align); - - if (FirstIdx < 8) - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx], - LocVT, LocInfo)); - else - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - - return true; + return false; } #include "MipsGenCallingConv.inc" -static void -AnalyzeMips64CallOperands(CCState &CCInfo, - const SmallVectorImpl<ISD::OutputArg> &Outs) { - unsigned NumOps = Outs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - bool R; - - if (Outs[i].IsFixed) - R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - else - R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - - if (R) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// static const unsigned O32IntRegsSize = 4; -static const uint16_t O32IntRegs[] = { - Mips::A0, Mips::A1, Mips::A2, Mips::A3 -}; - // Return next O32 integer argument register. static unsigned getNextIntArgReg(unsigned Reg) { assert((Reg == Mips::A0) || (Reg == Mips::A2)); return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -// Write ByVal Arg to arg registers and stack. -static void -WriteByValArg(SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, - MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - MVT PtrType, bool isLittle) { - unsigned LocMemOffset = VA.getLocMemOffset(); - unsigned Offset = 0; - uint32_t RemainingSize = Flags.getByValSize(); - unsigned ByValAlign = Flags.getByValAlign(); - - // Copy the first 4 words of byval arg to registers A0 - A3. - // FIXME: Use a stricter alignment if it enables better optimization in passes - // run later. - for (; RemainingSize >= 4 && LocMemOffset < 4 * 4; - Offset += 4, RemainingSize -= 4, LocMemOffset += 4) { - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, - MachinePointerInfo(), false, false, false, - std::min(ByValAlign, (unsigned )4)); - MemOpChains.push_back(LoadVal.getValue(1)); - unsigned DstReg = O32IntRegs[LocMemOffset / 4]; - RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); - } - - if (RemainingSize == 0) - return; - - // If there still is a register available for argument passing, write the - // remaining part of the structure to it using subword loads and shifts. - if (LocMemOffset < 4 * 4) { - assert(RemainingSize <= 3 && RemainingSize >= 1 && - "There must be one to three bytes remaining."); - unsigned LoadSize = (RemainingSize == 3 ? 2 : RemainingSize); - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - unsigned Alignment = std::min(ByValAlign, (unsigned )4); - SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, - LoadPtr, MachinePointerInfo(), - MVT::getIntegerVT(LoadSize * 8), false, - false, Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - - // If target is big endian, shift it to the most significant half-word or - // byte. - if (!isLittle) - LoadVal = DAG.getNode(ISD::SHL, dl, MVT::i32, LoadVal, - DAG.getConstant(32 - LoadSize * 8, MVT::i32)); - - Offset += LoadSize; - RemainingSize -= LoadSize; - - // Read second subword if necessary. - if (RemainingSize != 0) { - assert(RemainingSize == 1 && "There must be one byte remaining."); - LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - unsigned Alignment = std::min(ByValAlign, (unsigned )2); - SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, - LoadPtr, MachinePointerInfo(), - MVT::i8, false, false, Alignment); - MemOpChains.push_back(Subword.getValue(1)); - // Insert the loaded byte to LoadVal. - // FIXME: Use INS if supported by target. - unsigned ShiftAmt = isLittle ? 16 : 8; - SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i32, Subword, - DAG.getConstant(ShiftAmt, MVT::i32)); - LoadVal = DAG.getNode(ISD::OR, dl, MVT::i32, LoadVal, Shift); - } - - unsigned DstReg = O32IntRegs[LocMemOffset / 4]; - RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); - return; - } - - // Copy remaining part of byval arg using memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, - DAG.getConstant(Offset, MVT::i32)); - SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, - DAG.getIntPtrConstant(LocMemOffset)); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(RemainingSize, MVT::i32), - std::min(ByValAlign, (unsigned)4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); -} - -// Copy Mips64 byVal arg to registers and stack. -void static -PassByValArg64(SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, - MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - EVT PtrTy, bool isLittle) { - unsigned ByValSize = Flags.getByValSize(); - unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); - bool IsRegLoc = VA.isRegLoc(); - unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. - unsigned LocMemOffset = 0; - unsigned MemCpySize = ByValSize; - - if (!IsRegLoc) - LocMemOffset = VA.getLocMemOffset(); - else { - const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, - VA.getLocReg()); - const uint16_t *RegEnd = Mips64IntRegs + 8; - - // Copy double words to registers. - for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr, - MachinePointerInfo(), false, false, false, - Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); - } - - // Return if the struct has been fully copied. - if (!(MemCpySize = ByValSize - Offset)) - return; - - // If there is an argument register available, copy the remainder of the - // byval argument with sub-doubleword loads and shifts. - if (Reg != RegEnd) { - assert((ByValSize < Offset + 8) && - "Size of the remainder should be smaller than 8-byte."); - SDValue Val; - for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) { - unsigned RemSize = ByValSize - Offset; - - if (RemSize < LoadSize) - continue; - - SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = - DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr, - MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), - false, false, Alignment); - MemOpChains.push_back(LoadVal.getValue(1)); - - // Offset in number of bits from double word boundary. - unsigned OffsetDW = (Offset % 8) * 8; - unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8); - SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal, - DAG.getConstant(Shamt, MVT::i32)); - - Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) : - Shift; - Offset += LoadSize; - Alignment = std::min(Alignment, LoadSize); - } - - RegsToPass.push_back(std::make_pair(*Reg, Val)); - return; - } - } - - assert(MemCpySize && "MemCpySize must not be zero."); - - // Copy remainder of byval arg to it with memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, - DAG.getIntPtrConstant(LocMemOffset)); - Chain = DAG.getMemcpy(Chain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - MemOpChains.push_back(Chain); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. bool MipsTargetLowering:: -IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC, - unsigned NextStackOffset) const { +IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { if (!EnableMipsTailCalls) return false; - // Do not tail-call optimize if there is an argument passed on stack. - if (IsO32 && (CalleeCC != CallingConv::Fast)) { - if (NextStackOffset > 16) - return false; - } else if (NextStackOffset) + // No tail call optimization for mips16. + if (Subtarget->inMips16Mode()) return false; - return true; + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's next stack offset is no larger than the + // caller's. + return NextStackOffset <= FI.nextStackOffset(); +} + +SDValue +MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, + SDValue Chain, SDValue Arg, DebugLoc DL, + bool IsTailCall, SelectionDAG &DAG) const { + if (!IsTailCall) { + SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, + DAG.getIntPtrConstant(Offset)); + return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, + false, 0); + } + + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), + /*isVolatile=*/ true, false, 0); } /// LowerCall - functions arguments are copied from virtual regs to @@ -2984,30 +2784,18 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); - if (CallConv == CallingConv::Fast) - CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC); - else if (IsO32) - CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); - else if (HasMips64) - AnalyzeMips64CallOperands(CCInfo, Outs); - else - CCInfo.AnalyzeCallOperands(Outs, CC_Mips); + MipsCCInfo.analyzeCallOperands(Outs); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); - - // Update size of the maximum argument space. - // For O32, a minimum of four words (16 bytes) of argument space is - // allocated. - if (IsO32 && (CallConv != CallingConv::Fast)) - NextStackOffset = std::max(NextStackOffset, (unsigned)16); // Check if it's really possible to do a tail call. if (isTailCall) - isTailCall = IsEligibleForTailCallOptimization(CallConv, NextStackOffset); + isTailCall = + IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset, + *MF.getInfo<MipsFunctionInfo>()); if (isTailCall) ++NumTailCalls; @@ -3015,6 +2803,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); if (!isTailCall) @@ -3027,6 +2817,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; + MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -3039,14 +2830,12 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - if (IsO32) - WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, - MFI, DAG, Arg, VA, Flags, getPointerTy(), - Subtarget->isLittle()); - else - PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr, - MFI, DAG, Arg, VA, Flags, getPointerTy(), - Subtarget->isLittle()); + assert(ByValArg != MipsCCInfo.byval_end()); + assert(!isTailCall && + "Do not tail-call optimize if there is a byval argument."); + passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle()); + ++ByValArg; continue; } @@ -3096,10 +2885,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // emit ISD::STORE whichs stores the // parameter value to a stack Location - SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, - DAG.getIntPtrConstant(VA.getLocMemOffset())); - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), false, false, 0)); + MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), + Chain, Arg, dl, isTailCall, DAG)); } // Transform all store nodes into one single node because all store @@ -3274,70 +3061,6 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue> &OutChains, - SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - const Argument *FuncArg) { - unsigned LocMem = VA.getLocMemOffset(); - unsigned FirstWord = LocMem / 4; - - // copy register A0 - A3 to frame object - for (unsigned i = 0; i < NumWords; ++i) { - unsigned CurWord = FirstWord + i; - if (CurWord >= O32IntRegsSize) - break; - - unsigned SrcReg = O32IntRegs[CurWord]; - unsigned Reg = AddLiveIn(MF, SrcReg, &Mips::CPURegsRegClass); - SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, - DAG.getConstant(i * 4, MVT::i32)); - SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), - StorePtr, MachinePointerInfo(FuncArg, i * 4), - false, false, 0); - OutChains.push_back(Store); - } -} - -// Create frame object on stack and copy registers used for byval passing to it. -static unsigned -CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue> &OutChains, SelectionDAG &DAG, - const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, - MachineFrameInfo *MFI, bool IsRegLoc, - SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, - EVT PtrTy, const Argument *FuncArg) { - const uint16_t *Reg = Mips64IntRegs + 8; - int FOOffset; // Frame object offset from virtual frame pointer. - - if (IsRegLoc) { - Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); - FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8; - } - else - FOOffset = VA.getLocMemOffset(); - - // Create frame object. - unsigned NumRegs = (Flags.getByValSize() + 7) / 8; - unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true); - SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy); - InVals.push_back(FIN); - - // Copy arg registers. - for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs); - ++Reg, ++I) { - unsigned VReg = AddLiveIn(MF, *Reg, &Mips::CPU64RegsRegClass); - SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, - DAG.getConstant(I * 8, PtrTy)); - SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), - StorePtr, MachinePointerInfo(FuncArg, I * 8), - false, false, 0); - OutChains.push_back(Store); - } - - return LastFI; -} - /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue @@ -3361,20 +3084,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, isVarArg, IsO32, CCInfo); - if (CallConv == CallingConv::Fast) - CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC); - else if (IsO32) - CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); - else - CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); + MipsCCInfo.analyzeFormalArguments(Ins); + MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), + MipsCCInfo.hasByValArg()); + MipsFI->setIncomingArgSize(CCInfo.getNextStackOffset()); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); - int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function. + unsigned CurArgIdx = 0; + MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[i].OrigArgIndex; EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; bool IsRegLoc = VA.isRegLoc(); @@ -3382,18 +3107,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (Flags.isByVal()) { assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); - if (IsO32) { - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), - true); - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); - InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags, - &*FuncArg); - } else // N32/64 - LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, - MFI, IsRegLoc, InVals, MipsFI, - getPointerTy(), &*FuncArg); + assert(ByValArg != MipsCCInfo.byval_end()); + copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg, + MipsCCInfo, *ByValArg); + ++ByValArg; continue; } @@ -3455,13 +3172,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); // The stack pointer offset is relative to the caller stack frame. - LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(LastFI), + MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); } } @@ -3480,48 +3197,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - if (isVarArg) { - unsigned NumOfRegs = IsO32 ? 4 : 8; - const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; - unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); - int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. - const TargetRegisterClass *RC = IsO32 ? - (const TargetRegisterClass*)&Mips::CPURegsRegClass : - (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - unsigned RegSize = RC->getSize(); - int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; - - // Offset of the first variable argument from stack pointer. - int FirstVaArgOffset; - - if (IsO32 || (Idx == NumOfRegs)) { - FirstVaArgOffset = - (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize; - } else - FirstVaArgOffset = RegSlotOffset; - - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true); - MipsFI->setVarArgsFrameIndex(LastFI); - - // Copy the integer registers that have not been used for argument passing - // to the argument register save area. For O32, the save area is allocated - // in the caller's stack frame, while for N32/64, it is allocated in the - // callee's stack frame. - for (int StackOffset = RegSlotOffset; - Idx < NumOfRegs; ++Idx, StackOffset += RegSize) { - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, - MVT::getIntegerVT(RegSize * 8)); - LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true); - SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, - MachinePointerInfo(), false, false, 0)); - } - } - - MipsFI->setLastInArgFI(LastFI); + if (isVarArg) + writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions @@ -3872,3 +3549,316 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } + +MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CallConv, bool IsVarArg, + bool IsO32, CCState &Info) : CCInfo(Info) { + UseRegsForByval = true; + + if (IsO32) { + RegSize = 4; + NumIntArgRegs = array_lengthof(O32IntRegs); + ReservedArgArea = 16; + IntArgRegs = ShadowRegs = O32IntRegs; + FixedFn = VarFn = CC_MipsO32; + } else { + RegSize = 8; + NumIntArgRegs = array_lengthof(Mips64IntRegs); + ReservedArgArea = 0; + IntArgRegs = Mips64IntRegs; + ShadowRegs = Mips64DPRegs; + FixedFn = CC_MipsN; + VarFn = CC_MipsN_VarArg; + } + + if (CallConv == CallingConv::Fast) { + assert(!IsVarArg); + UseRegsForByval = false; + ReservedArgArea = 0; + FixedFn = VarFn = CC_Mips_FastCC; + } + + // Pre-allocate reserved argument area. + CCInfo.AllocateStack(ReservedArgArea, 1); +} + +void MipsTargetLowering::MipsCC:: +analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args) { + unsigned NumOpnds = Args.size(); + + for (unsigned I = 0; I != NumOpnds; ++I) { + MVT ArgVT = Args[I].VT; + ISD::ArgFlagsTy ArgFlags = Args[I].Flags; + bool R; + + if (ArgFlags.isByVal()) { + handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags); + continue; + } + + if (Args[I].IsFixed) + R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + + if (R) { +#ifndef NDEBUG + dbgs() << "Call operand #" << I << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +void MipsTargetLowering::MipsCC:: +analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) { + unsigned NumArgs = Args.size(); + + for (unsigned I = 0; I != NumArgs; ++I) { + MVT ArgVT = Args[I].VT; + ISD::ArgFlagsTy ArgFlags = Args[I].Flags; + + if (ArgFlags.isByVal()) { + handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags); + continue; + } + + if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) + continue; + +#ifndef NDEBUG + dbgs() << "Formal Arg #" << I << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } +} + +void +MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, + MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags) { + assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0."); + + struct ByValArgInfo ByVal; + unsigned ByValSize = RoundUpToAlignment(ArgFlags.getByValSize(), RegSize); + unsigned Align = std::min(std::max(ArgFlags.getByValAlign(), RegSize), + RegSize * 2); + + if (UseRegsForByval) + allocateRegs(ByVal, ByValSize, Align); + + // Allocate space on caller's stack. + ByVal.Address = CCInfo.AllocateStack(ByValSize - RegSize * ByVal.NumRegs, + Align); + CCInfo.addLoc(CCValAssign::getMem(ValNo, ValVT, ByVal.Address, LocVT, + LocInfo)); + ByValArgs.push_back(ByVal); +} + +void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, + unsigned ByValSize, + unsigned Align) { + assert(!(ByValSize % RegSize) && !(Align % RegSize) && + "Byval argument's size and alignment should be a multiple of" + "RegSize."); + + ByVal.FirstIdx = CCInfo.getFirstUnallocated(IntArgRegs, NumIntArgRegs); + + // If Align > RegSize, the first arg register must be even. + if ((Align > RegSize) && (ByVal.FirstIdx % 2)) { + CCInfo.AllocateReg(IntArgRegs[ByVal.FirstIdx], ShadowRegs[ByVal.FirstIdx]); + ++ByVal.FirstIdx; + } + + // Mark the registers allocated. + for (unsigned I = ByVal.FirstIdx; ByValSize && (I < NumIntArgRegs); + ByValSize -= RegSize, ++I, ++ByVal.NumRegs) + CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]); +} + +void MipsTargetLowering:: +copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, + SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, + SmallVectorImpl<SDValue> &InVals, const Argument *FuncArg, + const MipsCC &CC, const ByValArgInfo &ByVal) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned RegAreaSize = ByVal.NumRegs * CC.regSize(); + unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); + int FrameObjOffset; + + if (RegAreaSize) + FrameObjOffset = (int)CC.reservedArgArea() - + (int)((CC.numIntArgRegs() - ByVal.FirstIdx) * CC.regSize()); + else + FrameObjOffset = ByVal.Address; + + // Create frame object. + EVT PtrTy = getPointerTy(); + int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + InVals.push_back(FIN); + + if (!ByVal.NumRegs) + return; + + // Copy arg registers. + EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + + for (unsigned I = 0; I < ByVal.NumRegs; ++I) { + unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I]; + unsigned VReg = AddLiveIn(MF, ArgReg, RC); + unsigned Offset = I * CC.regSize(); + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, + DAG.getConstant(Offset, PtrTy)); + SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), + StorePtr, MachinePointerInfo(FuncArg, Offset), + false, false, 0); + OutChains.push_back(Store); + } +} + +// Copy byVal arg to registers and stack. +void MipsTargetLowering:: +passByValArg(SDValue Chain, DebugLoc DL, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const MipsCC &CC, const ByValArgInfo &ByVal, + const ISD::ArgFlagsTy &Flags, bool isLittle) const { + unsigned ByValSize = Flags.getByValSize(); + unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. + unsigned RegSize = CC.regSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), RegSize); + EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8); + + if (ByVal.NumRegs) { + const uint16_t *ArgRegs = CC.intArgRegs(); + bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize); + unsigned I = 0; + + // Copy words to registers. + for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, + MachinePointerInfo(), false, false, false, + Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; + RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); + } + + // Return if the struct has been fully copied. + if (ByValSize == Offset) + return; + + // Copy the remainder of the byval argument with sub-word loads and shifts. + if (LeftoverBytes) { + assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) && + "Size of the remainder should be smaller than RegSize."); + SDValue Val; + + for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0; + Offset < ByValSize; LoadSize /= 2) { + unsigned RemSize = ByValSize - Offset; + + if (RemSize < LoadSize) + continue; + + // Load subword. + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = + DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, + MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), + false, false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Shift the loaded value. + unsigned Shamt; + + if (isLittle) + Shamt = TotalSizeLoaded; + else + Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8; + + SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, + DAG.getConstant(Shamt, MVT::i32)); + + if (Val.getNode()) + Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); + else + Val = Shift; + + Offset += LoadSize; + TotalSizeLoaded += LoadSize; + Alignment = std::min(Alignment, LoadSize); + } + + unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; + RegsToPass.push_back(std::make_pair(ArgReg, Val)); + return; + } + } + + // Copy remainder of byval arg to it with memcpy. + unsigned MemCpySize = ByValSize - Offset; + SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, + DAG.getIntPtrConstant(ByVal.Address)); + Chain = DAG.getMemcpy(Chain, DL, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); +} + +void +MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains, + const MipsCC &CC, SDValue Chain, + DebugLoc DL, SelectionDAG &DAG) const { + unsigned NumRegs = CC.numIntArgRegs(); + const uint16_t *ArgRegs = CC.intArgRegs(); + const CCState &CCInfo = CC.getCCInfo(); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs); + unsigned RegSize = CC.regSize(); + EVT RegTy = MVT::getIntegerVT(RegSize * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + // Offset of the first variable argument from stack pointer. + int VaArgOffset; + + if (NumRegs == Idx) + VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize); + else + VaArgOffset = + (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true); + MipsFI->setVarArgsFrameIndex(FI); + + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) { + unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0); + cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(0); + OutChains.push_back(Store); + } +} diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 18b9db7bc6..ac82347a1f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -17,6 +17,7 @@ #include "Mips.h" #include "MipsSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -140,6 +141,7 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// + class MipsFunctionInfo; class MipsTargetLowering : public TargetLowering { public: @@ -171,6 +173,69 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: + + /// ByValArgInfo - Byval argument information. + struct ByValArgInfo { + unsigned FirstIdx; // Index of the first register used. + unsigned NumRegs; // Number of registers used for this argument. + unsigned Address; // Offset of the stack area used to pass this argument. + + ByValArgInfo() : FirstIdx(0), NumRegs(0), Address(0) {} + }; + + /// MipsCC - This class provides methods used to analyze formal and call + /// arguments and inquire about calling convention information. + class MipsCC { + public: + MipsCC(CallingConv::ID CallConv, bool IsVarArg, bool IsO32, + CCState &Info); + + void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs); + void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); + void handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags); + + const CCState &getCCInfo() const { return CCInfo; } + + /// hasByValArg - Returns true if function has byval arguments. + bool hasByValArg() const { return !ByValArgs.empty(); } + + /// useRegsForByval - Returns true if the calling convention allows the + /// use of registers to pass byval arguments. + bool useRegsForByval() const { return UseRegsForByval; } + + /// regSize - Size (in number of bits) of integer registers. + unsigned regSize() const { return RegSize; } + + /// numIntArgRegs - Number of integer registers available for calls. + unsigned numIntArgRegs() const { return NumIntArgRegs; } + + /// reservedArgArea - The size of the area the caller reserves for + /// register arguments. This is 16-byte if ABI is O32. + unsigned reservedArgArea() const { return ReservedArgArea; } + + /// intArgRegs - Pointer to array of integer registers. + const uint16_t *intArgRegs() const { return IntArgRegs; } + + typedef SmallVector<ByValArgInfo, 2>::const_iterator byval_iterator; + byval_iterator byval_begin() const { return ByValArgs.begin(); } + byval_iterator byval_end() const { return ByValArgs.end(); } + + private: + void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, + unsigned Align); + + CCState &CCInfo; + bool UseRegsForByval; + unsigned RegSize; + unsigned NumIntArgRegs; + unsigned ReservedArgArea; + const uint16_t *IntArgRegs, *ShadowRegs; + SmallVector<ByValArgInfo, 2> ByValArgs; + llvm::CCAssignFn *FixedFn, *VarFn; + }; + // Subtarget Info const MipsSubtarget *Subtarget; @@ -210,8 +275,33 @@ namespace llvm { /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC, - unsigned NextStackOffset) const; + bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + /// copyByValArg - Copy argument registers which were used to pass a byval + /// argument to the stack. Create a stack frame object for the byval + /// argument. + void copyByValRegs(SDValue Chain, DebugLoc DL, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, + const ISD::ArgFlagsTy &Flags, + SmallVectorImpl<SDValue> &InVals, + const Argument *FuncArg, + const MipsCC &CC, const ByValArgInfo &ByVal) const; + + /// passByValArg - Pass a byval argument in registers or on stack. + void passByValArg(SDValue Chain, DebugLoc DL, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const MipsCC &CC, const ByValArgInfo &ByVal, + const ISD::ArgFlagsTy &Flags, bool isLittle) const; + + /// writeVarArgRegs - Write variable function arguments passed in registers + /// to the stack. Also create a stack frame object for the first variable + /// argument. + void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC, + SDValue Chain, DebugLoc DL, SelectionDAG &DAG) const; // @LOCALMOD-BEGIN SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; @@ -225,6 +315,10 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, + SDValue Arg, DebugLoc DL, bool IsTailCall, + SelectionDAG &DAG) const; + virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index fa6faf242d..dcab7cbf00 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -182,20 +182,21 @@ defm CEIL_W : FFR1_W_M<0xe, "ceil">; defm CEIL_L : FFR1_L_M<0xa, "ceil">; defm FLOOR_W : FFR1_W_M<0xf, "floor">; defm FLOOR_L : FFR1_L_M<0xb, "floor">; -defm CVT_W : FFR1_W_M<0x24, "cvt">; +defm CVT_W : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects; //defm CVT_L : FFR1_L_M<0x25, "cvt">; -def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; -def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>; -def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>; +def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects; +def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects; +def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects; -let Predicates = [NotFP64bit, HasStandardEncoding] in { +let Predicates = [NotFP64bit, HasStandardEncoding], neverHasSideEffects = 1 in { def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; } -let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64", + neverHasSideEffects = 1 in { def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index bab26c3aa9..f6259cc269 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -202,6 +202,14 @@ class IsTailCall { bit isCodeGenOnly = 1; } +class IsAsCheapAsAMove { + bit isAsCheapAsAMove = 1; +} + +class NeverHasSideEffects { + bit neverHasSideEffects = 1; +} + //===----------------------------------------------------------------------===// // Instruction format superclass //===----------------------------------------------------------------------===// @@ -419,7 +427,7 @@ class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm, // Load Upper Imediate class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>: FI<op, (outs RC:$rt), (ins Imm:$imm16), - !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> { + !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove { let rs = 0; let neverHasSideEffects = 1; let isReMaterializable = 1; @@ -958,7 +966,8 @@ def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>; //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>; +def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>, + IsAsCheapAsAMove; def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>; def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>; def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index b9dbd522b7..5d9f0cffb7 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -424,8 +424,6 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { // Compute basic block addresses. if (TM.getRelocationModel() == Reloc::PIC_) { - MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); - uint64_t Address = 0; for (I = MBBInfos.begin(); I != E; Address += I->Size, ++I) diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 362173eda3..5ff19aba02 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -43,4 +43,17 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC); } +bool MipsFunctionInfo::mips16SPAliasRegSet() const { + return Mips16SPAliasReg; +} +unsigned MipsFunctionInfo::getMips16SPAliasReg() { + // Return if it has already been initialized. + if (Mips16SPAliasReg) + return Mips16SPAliasReg; + + const TargetRegisterClass *RC; + RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + return Mips16SPAliasReg = MF.getRegInfo().createVirtualRegister(RC); +} + void MipsFunctionInfo::anchor() { } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 5b766f22a8..d2eba58114 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -39,38 +39,48 @@ class MipsFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// Mips16SPAliasReg - keeps track of the virtual register initialized for + /// use as an alias for SP for use in load/store of halfword/byte from/to + /// the stack + unsigned Mips16SPAliasReg; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; - // Range of frame object indices. - // InArgFIRange: Range of indices of all frame objects created during call to - // LowerFormalArguments. - std::pair<int, int> InArgFIRange; + // Formal argument information obtained during call to LowerFormalArguments. + unsigned NextStackOffset; + bool HasByvalArg; - bool EmitNOAT; + // Size of incoming argument area. + unsigned IncomingArgSize; public: MipsFunctionInfo(MachineFunction& MF) - : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), - VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), EmitNOAT(false) + : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0), + VarArgsFrameIndex(0) {} - bool isInArgFI(int FI) const { - return FI <= InArgFIRange.first && FI >= InArgFIRange.second; - } - void setLastInArgFI(int FI) { InArgFIRange.second = FI; } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } bool globalBaseRegSet() const; unsigned getGlobalBaseReg(); + bool mips16SPAliasRegSet() const; + unsigned getMips16SPAliasReg(); + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } - bool getEmitNOAT() const { return EmitNOAT; } - void setEmitNOAT() { EmitNOAT = true; } + unsigned nextStackOffset() const { return NextStackOffset; } + bool hasByvalArg() const { return HasByvalArg; } + void setFormalArgInfo(unsigned Offset, bool HasByval) { + NextStackOffset = Offset; + HasByvalArg = HasByval; + } + + unsigned getIncomingArgSize() const { return IncomingArgSize; } + void setIncomingArgSize(unsigned S) { IncomingArgSize = S; } }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index ab32bc4a66..13893a1e31 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -81,13 +81,13 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const uint16_t ReservedCPURegs[] = { - Mips::ZERO, Mips::AT, + Mips::ZERO, Mips::T6, Mips::T7, Mips::T8, // @LOCALMOD: reserved for PNaCl use Mips::K0, Mips::K1, Mips::SP }; static const uint16_t ReservedCPU64Regs[] = { - Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 + Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 }; BitVector Reserved(getNumRegs()); @@ -96,29 +96,28 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I) Reserved.set(ReservedCPURegs[I]); - if (Subtarget.hasMips64()) { - for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I) - Reserved.set(ReservedCPU64Regs[I]); + for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I) + Reserved.set(ReservedCPU64Regs[I]); + if (Subtarget.hasMips64()) { // Reserve all registers in AFGR64. for (RegIter Reg = Mips::AFGR64RegClass.begin(), EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); } else { - // Reserve all registers in CPU64Regs & FGR64. - for (RegIter Reg = Mips::CPU64RegsRegClass.begin(), - EReg = Mips::CPU64RegsRegClass.end(); Reg != EReg; ++Reg) - Reserved.set(*Reg); - + // Reserve all registers in FGR64. for (RegIter Reg = Mips::FGR64RegClass.begin(), EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); } - // Reserve FP if this function should have a dedicated frame pointer register. if (MF.getTarget().getFrameLowering()->hasFP(MF)) { - Reserved.set(Mips::FP); - Reserved.set(Mips::FP_64); + if (Subtarget.inMips16Mode()) + Reserved.set(Mips::S0); + else { + Reserved.set(Mips::FP); + Reserved.set(Mips::FP_64); + } } // Reserve hardware registers. @@ -188,8 +187,12 @@ getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); bool IsN64 = Subtarget.isABI_N64(); - return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : - (IsN64 ? Mips::SP_64 : Mips::SP); + if (Subtarget.inMips16Mode()) + return TFI->hasFP(MF) ? Mips::S0 : Mips::SP; + else + return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : + (IsN64 ? Mips::SP_64 : Mips::SP); + } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index a72e3b857f..391c19e07e 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -73,7 +73,7 @@ class HWR<bits<5> num, string n> : MipsReg<n> { let Namespace = "Mips" in { // General Purpose Registers def ZERO : MipsGPRReg< 0, "zero">, DwarfRegNum<[0]>; - def AT : MipsGPRReg< 1, "at">, DwarfRegNum<[1]>; + def AT : MipsGPRReg< 1, "1">, DwarfRegNum<[1]>; def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>; def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>; def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>; @@ -107,7 +107,7 @@ let Namespace = "Mips" in { // General Purpose 64-bit Registers def ZERO_64 : Mips64GPRReg< 0, "zero", [ZERO]>, DwarfRegNum<[0]>; - def AT_64 : Mips64GPRReg< 1, "at", [AT]>, DwarfRegNum<[1]>; + def AT_64 : Mips64GPRReg< 1, "1", [AT]>, DwarfRegNum<[1]>; def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>; def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>; def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 16a6757d66..03f5176b29 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -202,6 +203,19 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) MRI.setPhysRegUsed(FP); + + // Set scavenging frame index if necessary. + uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + + estimateStackSize(MF); + + if (isInt<16>(MaxSPOffset)) + return; + + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + RS->setScavengingFrameIndex(FI); } const MipsFrameLowering * diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index e4b44efd81..fb0f9df038 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -260,9 +260,8 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. - MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0); - BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill); } } @@ -274,10 +273,12 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, unsigned *NewImm) const { MipsAnalyzeImmediate AnalyzeImm; const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); unsigned Size = STI.isABI_N64() ? 64 : 32; unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + const TargetRegisterClass *RC = STI.isABI_N64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; bool LastInstrIsADDiu = NewImm; const MipsAnalyzeImmediate::InstSeq &Seq = @@ -289,22 +290,23 @@ MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. + unsigned Reg = RegInfo.createVirtualRegister(RC); + if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); + BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); else - BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ZEROReg) + BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(ZEROReg) .addImm(SignExtend64<16>(Inst->ImmOpnd)); // Build the remaining instructions in Seq. for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) - BuildMI(MBB, II, DL, get(Inst->Opc), ATReg).addReg(ATReg) + BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(Reg, RegState::Kill) .addImm(SignExtend64<16>(Inst->ImmOpnd)); if (LastInstrIsADDiu) *NewImm = Inst->ImmOpnd; - return ATReg; + return Reg; } unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 8e2c2c5174..56b9ba95e5 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -43,6 +44,16 @@ MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, const MipsSEInstrInfo &I) : MipsRegisterInfo(ST), TII(I) {} +bool MipsSERegisterInfo:: +requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + +bool MipsSERegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; +} + // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsSERegisterInfo:: @@ -72,7 +83,6 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -103,6 +113,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: // incoming argument, callee-saved register location or local variable. + bool IsKill = false; int64_t Offset; Offset = SPOffset + (int64_t)StackSize; @@ -116,17 +127,17 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; unsigned NewImm; - MipsFI->setEmitNOAT(); unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(Reg); + BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg) + .addReg(Reg, RegState::Kill); - FrameReg = ATReg; + FrameReg = Reg; Offset = SignExtend64<16>(NewImm); + IsKill = true; } - MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); } diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index b4eab65522..7437bd36c3 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -27,6 +27,10 @@ public: MipsSERegisterInfo(const MipsSubtarget &Subtarget, const MipsSEInstrInfo &TII); + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index f610253f49..0ed3277306 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -53,7 +53,7 @@ MipsTargetMachine(const Target &T, StringRef TT, InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(*this), TSInfo(*this), JITInfo(), - ELFWriterInfo(false, isLittle), STTI(&TLInfo), VTTI(&TLInfo) { + STTI(&TLInfo), VTTI(&TLInfo) { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index a62db327e5..b54f5cee6d 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -20,7 +20,6 @@ #include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" -#include "MipsELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" @@ -38,7 +37,6 @@ class MipsTargetMachine : public LLVMTargetMachine { MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; - MipsELFWriterInfo ELFWriterInfo; ScalarTargetTransformImpl STTI; VectorTargetTransformImpl VTTI; @@ -74,9 +72,6 @@ public: return &TSInfo; } - virtual const MipsELFWriterInfo *getELFWriterInfo() const { - return &ELFWriterInfo; - } virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { return &STTI; } diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 971d1b89a8..d3dfb35e26 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -126,9 +126,8 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return Base; // Truncate/sext the offset to the pointer size. - unsigned PtrSize = TD.getPointerTypeSizeInBits(PtrVal->getType()); - if (PtrSize != 64) { - int SExtAmount = 64-PtrSize; + if (TD.getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD.getPointerSizeInBits(); Offset = (Offset << SExtAmount) >> SExtAmount; } @@ -150,7 +149,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CE->getType()), + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), false/*ZExt*/); return LowerConstant(Op, AP); } @@ -1379,7 +1378,7 @@ getOpenCLAlignment(const DataLayout *TD, const FunctionType *FTy = dyn_cast<FunctionType>(Ty); if (FTy) - return TD->getPointerPrefAlignment(0); + return TD->getPointerPrefAlignment(); return TD->getPrefTypeAlignment(Ty); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6941413ed4..72395bb283 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -443,7 +443,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { bool PPCLinuxAsmPrinter::doFinalization(Module &M) { const DataLayout *TD = TM.getDataLayout(); - bool isPPC64 = TD->getPointerSizeInBits(0) == 64; + bool isPPC64 = TD->getPointerSizeInBits() == 64; if (isPPC64 && !TOC.empty()) { const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", @@ -549,7 +549,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); @@ -644,7 +644,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64; + bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. const TargetLoweringObjectFileMachO &TLOFMacho = diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6195441cfc..254fea67fc 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -623,6 +623,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { } } +// getVCmpInst: return the vector compare instruction for the specified +// vector type and condition code. Since this is for altivec specific code, +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + case ISD::SETNE: + case ISD::SETUNE: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + // v4f32 != v4f32 could be translate to unordered not equal + else if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETLT: + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETGE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + else if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETULT: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETOLT: + case ISD::SETOGT: + case ISD::SETOLE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGEFP; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); +} + +// getVCmpEQInst: return the equal compare instruction for the specified vector +// type. Since this is for altivec specific code, only support the altivec +// types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { + switch (VecVT) { + case MVT::v16i8: + return PPC::VCMPEQUB; + case MVT::v8i16: + return PPC::VCMPEQUH; + case MVT::v4i32: + return PPC::VCMPEQUW; + case MVT::v4f32: + return PPC::VCMPEQFP; + default: + llvm_unreachable("Invalid integer vector compare condition"); + } +} + + SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; @@ -706,20 +788,58 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default + // Altivec Vector compare instructions do not set any CR register by default and + // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { - unsigned int Opc; - if (LHS.getValueType() == MVT::v16i8) - Opc = PPC::VCMPEQUB; - else if (LHS.getValueType() == MVT::v4i32) - Opc = PPC::VCMPEQUW; - else if (LHS.getValueType() == MVT::v8i16) - Opc = PPC::VCMPEQUH; - else if (LHS.getValueType() == MVT::v4f32) - Opc = PPC::VCMPEQFP; - else - llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); - return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS); + EVT VecVT = LHS.getValueType(); + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; + unsigned int VCmpInst = getVCmpInst(VT, CC); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + } + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: { + // Small optimization: Altivec provides a 'Vector Compare Greater Than + // or Equal To' instruction (vcmpgefp), so in this case there is no + // need for extra logic for the equal compare. + if (VecVT.getSimpleVT().isFloatingPoint()) { + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + } else { + SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + } + } + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: { + SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + } + default: + llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + } } bool Inv; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index de0d66124b..0922011d97 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -361,6 +361,22 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + + for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { + MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; + setTruncStoreAction(VT, InnerVT, Expand); + } + setLoadExtAction(ISD::SEXTLOAD, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, Expand); + } + + for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + setOperationAction(ISD::FSQRT, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -396,6 +412,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + // Altivec does not contain unordered floating-point compare instructions + setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { @@ -1498,10 +1522,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - unsigned AS = 0; Type *IntPtrTy = DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( - *DAG.getContext(), AS); + *DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2077,6 +2100,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // Empty aggregate parameters do not take up registers. Examples: + // struct { } a; + // union { } b; + // int c[0]; + // etc. However, we have to provide a place-holder in InVals, so + // pretend we have an 8-byte item at the current address for that + // purpose. + if (!ObjSize) { + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + InVals.push_back(FIN); + continue; + } // All aggregates smaller than 8 bytes must be passed right-justified. if (ObjSize < PtrByteSize) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); @@ -3641,6 +3677,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // These are the proper values we need for right-justifying the // aggregate in a parameter register. unsigned Size = Flags.getByValSize(); + + // An empty aggregate parameter takes up no storage and no + // registers. + if (Size == 0) + continue; + // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -3751,7 +3793,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + // A single float or an aggregate containing only a single float + // must be passed right-justified in the stack doubleword, and + // in the GPR, if one is available. + SDValue StoreOff; + if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { + SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); + StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); + } else + StoreOff = PtrOff; + + SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Store); @@ -6449,9 +6501,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); - if (VT == MVT::f64) + if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); break; case 'v': diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 5a78e8ac6b..6c2249a11b 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -494,16 +494,16 @@ def RLDIMI : MDForm_1<30, 3, // Rotate instructions. def RLDCL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB), - "rldcl $rA, $rS, $rB, $MB", IntRotateD, + (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE), + "rldcl $rA, $rS, $rB, $MBE", IntRotateD, []>, isPPC64; def RLDICL : MDForm_1<30, 0, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB), - "rldicl $rA, $rS, $SH, $MB", IntRotateDI, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLDICR : MDForm_1<30, 1, - (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), - "rldicr $rA, $rS, $SH, $ME", IntRotateDI, + (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr $rA, $rS, $SH, $MBE", IntRotateDI, []>, isPPC64; def RLWINM8 : MForm_2<21, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index d123211473..459c3589d3 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -498,7 +498,7 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, } else if (CRSpillFrameIdx) { FrameIdx = CRSpillFrameIdx; } else { - MachineFrameInfo *MFI = (const_cast<MachineFunction &>(MF)).getFrameInfo(); + MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo(); FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); CRSpillFrameIdx = FrameIdx; } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index c20995afe8..b9e22f43c3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -33,34 +33,34 @@ namespace PPC { enum { DIR_NONE, DIR_32, - DIR_440, - DIR_601, - DIR_602, - DIR_603, + DIR_440, + DIR_601, + DIR_602, + DIR_603, DIR_7400, - DIR_750, - DIR_970, + DIR_750, + DIR_970, DIR_A2, DIR_E500mc, DIR_E5500, DIR_PWR6, DIR_PWR7, - DIR_64 + DIR_64 }; } class GlobalValue; class TargetMachine; - + class PPCSubtarget : public PPCGenSubtargetInfo { protected: /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; - + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - + /// Which cpu directive was used. unsigned DarwinDirective; @@ -76,7 +76,7 @@ protected: bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; - + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -86,11 +86,11 @@ public: /// PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit); - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - + /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. void SetJITMode(); @@ -99,12 +99,12 @@ public: /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return StackAlignment; } - + /// getDarwinDirective - Returns the -m directive specified for the cpu. /// unsigned getDarwinDirective() const { return DarwinDirective; } - - /// getInstrItins - Return the instruction itineraies based on subtarget + + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } @@ -113,6 +113,13 @@ public: const char *getDataLayoutString() const { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). + if (isPPC64() && isSVR4ABI()) { + if (TargetTriple.getOS() == llvm::Triple::FreeBSD) + return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64"; + else + return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; + } + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; } @@ -120,22 +127,22 @@ public: /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. /// bool isPPC64() const { return IsPPC64; } - + /// has64BitSupport - Return true if the selected CPU supports 64-bit /// instructions, regardless of whether we are in 32-bit or 64-bit mode. bool has64BitSupport() const { return Has64BitSupport; } - + /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit /// registers in 32-bit mode when possible. This can only true if /// has64BitSupport() returns true. bool use64BitRegs() const { return Use64BitRegs; } - + /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. - bool hasLazyResolverStub(const GlobalValue *GV, + bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; - + // isJITCodeModel - True if we're generating code for the JIT bool isJITCodeModel() const { return IsJITCodeModel; } diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 7d3dd8f015..393178a469 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -64,7 +64,7 @@ unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) { } LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { - return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), 0)); + return wrap(unwrap(TD)->getIntPtrType(getGlobalContext())); } LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) { diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp deleted file mode 100644 index 66da391eca..0000000000 --- a/lib/Target/TargetELFWriterInfo.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the TargetELFWriterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Function.h" -#include "llvm/Target/TargetELFWriterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) : - is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) { -} - -TargetELFWriterInfo::~TargetELFWriterInfo() {} - diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp index 382eecb766..4cd07cd6c3 100644 --- a/lib/Target/TargetTransformImpl.cpp +++ b/lib/Target/TargetTransformImpl.cpp @@ -28,7 +28,7 @@ bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { } bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { + Type *Ty) const { return TLI->isLegalAddressingMode(AM, Ty); } @@ -49,83 +49,90 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const { return TLI->getJumpBufSize(); } +bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. // //===----------------------------------------------------------------------===// -int InstructionOpcodeToISD(unsigned Opcode) { - static const int OpToISDTbl[] = { - /*Instruction::Ret */ 0, // Opcode numbering start at #1. - /*Instruction::Br */ 0, - /*Instruction::Switch */ 0, - /*Instruction::IndirectBr */ 0, - /*Instruction::Invoke */ 0, - /*Instruction::Resume */ 0, - /*Instruction::Unreachable */ 0, - /*Instruction::Add */ ISD::ADD, - /*Instruction::FAdd */ ISD::FADD, - /*Instruction::Sub */ ISD::SUB, - /*Instruction::FSub */ ISD::FSUB, - /*Instruction::Mul */ ISD::MUL, - /*Instruction::FMul */ ISD::FMUL, - /*Instruction::UDiv */ ISD::UDIV, - /*Instruction::SDiv */ ISD::UDIV, - /*Instruction::FDiv */ ISD::FDIV, - /*Instruction::URem */ ISD::UREM, - /*Instruction::SRem */ ISD::SREM, - /*Instruction::FRem */ ISD::FREM, - /*Instruction::Shl */ ISD::SHL, - /*Instruction::LShr */ ISD::SRL, - /*Instruction::AShr */ ISD::SRA, - /*Instruction::And */ ISD::AND, - /*Instruction::Or */ ISD::OR, - /*Instruction::Xor */ ISD::XOR, - /*Instruction::Alloca */ 0, - /*Instruction::Load */ ISD::LOAD, - /*Instruction::Store */ ISD::STORE, - /*Instruction::GetElementPtr */ 0, - /*Instruction::Fence */ 0, - /*Instruction::AtomicCmpXchg */ 0, - /*Instruction::AtomicRMW */ 0, - /*Instruction::Trunc */ ISD::TRUNCATE, - /*Instruction::ZExt */ ISD::ZERO_EXTEND, - /*Instruction::SExt */ ISD::SEXTLOAD, - /*Instruction::FPToUI */ ISD::FP_TO_UINT, - /*Instruction::FPToSI */ ISD::FP_TO_SINT, - /*Instruction::UIToFP */ ISD::UINT_TO_FP, - /*Instruction::SIToFP */ ISD::SINT_TO_FP, - /*Instruction::FPTrunc */ ISD::FP_ROUND, - /*Instruction::FPExt */ ISD::FP_EXTEND, - /*Instruction::PtrToInt */ ISD::BITCAST, - /*Instruction::IntToPtr */ ISD::BITCAST, - /*Instruction::BitCast */ ISD::BITCAST, - /*Instruction::ICmp */ ISD::SETCC, - /*Instruction::FCmp */ ISD::SETCC, - /*Instruction::PHI */ 0, - /*Instruction::Call */ 0, - /*Instruction::Select */ ISD::SELECT, - /*Instruction::UserOp1 */ 0, - /*Instruction::UserOp2 */ 0, - /*Instruction::VAArg */ 0, - /*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT, - /*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT, - /*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE, - /*Instruction::ExtractValue */ ISD::MERGE_VALUES, - /*Instruction::InsertValue */ ISD::MERGE_VALUES, - /*Instruction::LandingPad */ 0}; - - assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) && - "Instruction order had changed"); - - // Opcode numbering starts at #1 but the table starts at #0, so we subtract - // one from the opcode number. - return OpToISDTbl[Opcode - 1]; -} - -std::pair<unsigned, EVT> +static int InstructionOpcodeToISD(unsigned Opcode) { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SEXTLOAD; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<unsigned, EVT> VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, - EVT Ty) const { + EVT Ty) const { unsigned Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that // the only operation that costs anything is the split. After splitting @@ -134,7 +141,7 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty); if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, LK.second); + return std::make_pair(Cost, Ty); if (LK.first == TargetLowering::TypeSplitVector) Cost *= 2; @@ -145,44 +152,173 @@ VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C, } unsigned -VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, - Type *Ty2) const { +VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, + bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { // Check if any of the operands are vector operands. int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); - // Selects on vectors are actually vector selects. - if (ISD == ISD::SELECT) { - assert(Ty2 && "Ty2 must hold the select type"); - if (Ty2->isVectorTy()) - ISD = ISD::VSELECT; + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(Ty->getContext(), TLI->getValueType(Ty)); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair<unsigned, EVT> SrcLT = + getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + + std::pair<unsigned, EVT> DstLT = + getTypeLegalizationCost(Dst->getContext(), TLI->getValueType(Dst)); + + // Handle scalar conversions. + if (!Src->isVectorTy() && !Dst->isVectorTy()) { + + // Scalar bitcasts and truncs are usually free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs 1. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return 1; + + // Assume that illegal scalar instruction are expensive. + return 4; } - // If we don't have any information about this instruction assume it costs 1. - if (ISD == 0) - return 1; + // Check vector-to-vector casts. + if (Dst->isVectorTy() && Src->isVectorTy()) { + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - assert(Ty1 && "We need to have at least one type"); + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs + // 1 and multiply by the type-legalization overhead. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return SrcLT.first * 1; + } + + // If we are converting vectors and the operation is illegal, or + // if the vectors are legalized to different types, estimate the + // scalarization costs. + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } - // From this stage we look at the legalized type. - std::pair<unsigned, EVT> LT = - getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1)); + // We already handled vector-to-vector and scalar-to-scalar conversions. This + // is where we handle bitcast between vectors and scalars. We need to assume + // that the conversion is scalarized in one way or another. + if (Opcode == Instruction::BitCast) + // Illegal bitcasts are done by storing and loading from a stack slot. + return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + + (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); - if (TLI->isOperationLegalOrCustom(ISD, LT.second)) { + llvm_unreachable("Unhandled cast"); + } + +unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { + return 1; +} + +unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, + Type *ValTy, + Type *CondTy) const { + int ISD = InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; + } + + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(ValTy->getContext(), TLI->getValueType(ValTy)); + + if (!TLI->isOperationExpand(ISD, LT.second)) { // The operation is legal. Assume it costs 1. Multiply // by the type-legalization overhead. return LT.first * 1; } - unsigned NumElem = - (LT.second.isVector() ? LT.second.getVectorNumElements() : 1); + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} - // We will probably scalarize this instruction. Assume that the cost is the - // number of the vector elements. - return LT.first * NumElem * 1; +/// Returns the expected cost of Vector Insert and Extract. +unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, + Type *Val, + unsigned Index) const { + return 1; } unsigned -VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { +VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, + Type *Ty2) const { return 1; } @@ -190,9 +326,16 @@ unsigned VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - // From this stage we look at the legalized type. - std::pair<unsigned, EVT> LT = + std::pair<unsigned, EVT> LT = getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src)); + // Assume that all loads of legal types cost 1. return LT.first; } + +unsigned +VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { + std::pair<unsigned, EVT> LT = + getTypeLegalizationCost(Tp->getContext(), TLI->getValueType(Tp)); + return LT.first; +} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 708951126f..ce446e7573 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -35,6 +35,7 @@ struct X86Operand; class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + ParseInstructionInfo *InstInfo; private: MCAsmParser &getParser() const { return Parser; } @@ -56,11 +57,13 @@ private: X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); + X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); - const MCExpr *ParseIntelDotOperator(const MCExpr *Disp); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, + SmallString<64> &Err); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -100,14 +103,15 @@ private: public: X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) - : MCTargetAsmParser(), STI(sti), Parser(parser) { + : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -177,6 +181,7 @@ struct X86Operand : public MCParsedAsmOperand { struct { const MCExpr *Val; + bool NeedAsmRewrite; } Imm; struct { @@ -225,6 +230,11 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } + bool needAsmRewrite() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.NeedAsmRewrite; + } + const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -468,9 +478,11 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, + bool NeedRewrite = true){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; + Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } @@ -683,6 +695,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Start, "Expected ']' token!"); Parser.Lex(); + End = Tok.getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { @@ -692,6 +705,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' Parser.Lex(); + End = Tok.getLoc(); const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); if (SegReg) return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, @@ -708,9 +722,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(Loc, "Unexpected token"); } - if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { - bool isPlus = getLexer().is(AsmToken::Plus); + // Parse ][ as a plus. + bool ExpectRBrac = true; + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; Parser.Lex(); + End = Tok.getLoc(); + } + + if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + bool isPlus = getLexer().is(AsmToken::Plus) || + getLexer().is(AsmToken::LBrac); + Parser.Lex(); SMLoc PlusLoc = Tok.getLoc(); if (getLexer().is(AsmToken::Integer)) { int64_t Val = Tok.getIntVal(); @@ -734,18 +759,40 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, else if (getParser().ParseExpression(Disp, End)) return 0; } } + + // Parse ][ as a plus. + if (getLexer().is(AsmToken::RBrac)) { + ExpectRBrac = false; + Parser.Lex(); + End = Tok.getLoc(); + if (getLexer().is(AsmToken::LBrac)) { + ExpectRBrac = true; + Parser.Lex(); + if (getParser().ParseExpression(Disp, End)) + return 0; + } + } else if (ExpectRBrac) { + if (getParser().ParseExpression(Disp, End)) + return 0; + } - if (getLexer().isNot(AsmToken::RBrac)) - if (getParser().ParseExpression(Disp, End)) return 0; - - End = Tok.getLoc(); - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(End, "expected ']' token!"); - Parser.Lex(); - End = Tok.getLoc(); + if (ExpectRBrac) { + if (getLexer().isNot(AsmToken::RBrac)) + return ErrorOperand(End, "expected ']' token!"); + Parser.Lex(); + End = Tok.getLoc(); + } - if (Tok.getString().startswith(".")) - Disp = ParseIntelDotOperator(Disp); + // Parse the dot operator (e.g., [ebx].foo.bar). + if (Tok.getString().startswith(".")) { + SmallString<64> Err; + const MCExpr *NewDisp; + if (ParseIntelDotOperator(Disp, &NewDisp, Err)) + return ErrorOperand(Tok.getLoc(), Err); + + Parser.Lex(); // Eat the field. + Disp = NewDisp; + } End = Tok.getLoc(); @@ -809,30 +856,56 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } /// Parse the '.' operator. -const MCExpr *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp) { +bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, + const MCExpr **NewDisp, + SmallString<64> &Err) { AsmToken Tok = *&Parser.getTok(); + uint64_t OrigDispVal, DotDispVal; + + // FIXME: Handle non-constant expressions. + if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { + OrigDispVal = OrigDisp->getValue(); + } else { + Err = "Non-constant offsets are not supported!"; + return true; + } // Drop the '.'. StringRef DotDispStr = Tok.getString().drop_front(1); - Lex(); // Eat .field. - // .Imm gets lexed as a real. if (Tok.is(AsmToken::Real)) { APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); - uint64_t DotDispVal = DotDisp.getZExtValue(); - - // Special case zero dot displacement. - if (!DotDispVal) return Disp; - - // FIXME: Handle non-constant expressions. - if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { - uint64_t OrigDispVal = OrigDisp->getValue(); - return MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + DotDispVal = DotDisp.getZExtValue(); + } else if (Tok.is(AsmToken::Identifier)) { + // We should only see an identifier when parsing the original inline asm. + // The front-end should rewrite this in terms of immediates. + assert (isParsingInlineAsm() && "Unexpected field name!"); + + unsigned DotDisp; + std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); + if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, + DotDisp)) { + Err = "Unable to lookup field reference!"; + return true; } + DotDispVal = DotDisp; + } else { + Err = "Unexpected token type!"; + return true; } - return Disp; + + if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { + SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); + unsigned Len = DotDispStr.size(); + unsigned Val = OrigDispVal + DotDispVal; + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, + Val)); + } + + *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + return false; } /// Parse the 'offset' operator. This operator is used to specify the @@ -843,13 +916,16 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { Start = Parser.getTok().getLoc(); assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); - SMLoc End; + SMLoc End; const MCExpr *Val; if (getParser().ParseExpression(Val, End)) - return 0; + return ErrorOperand(Start, "Unable to parse expression!"); End = Parser.getTok().getLoc(); + // Don't emit the offset operator. + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); + // The offset operator will have an 'r' constraint, thus we need to create // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. @@ -857,15 +933,63 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); } +/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or +/// C++ type or variable. If the variable is an array, TYPE returns the size of +/// a single element of the array. +X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { + SMLoc TypeLoc = Start; + Parser.Lex(); // Eat offset. + Start = Parser.getTok().getLoc(); + assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); + + SMLoc End; + const MCExpr *Val; + if (getParser().ParseExpression(Val, End)) + return 0; + + End = Parser.getTok().getLoc(); + + unsigned Size = 0; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); + + Size /= 8; // Size is in terms of bits, but we want bytes in the context. + } + + // Rewrite the type operator and the C or C++ type or variable in terms of an + // immediate. E.g. TYPE foo -> $$4 + unsigned Len = End.getPointer() - TypeLoc.getPointer(); + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); + + const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); + return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); +} + X86Operand *X86AsmParser::ParseIntelOperand() { SMLoc Start = Parser.getTok().getLoc(), End; // offset operator. - const AsmToken &Tok = Parser.getTok(); - if ((Tok.getString() == "offset" || Tok.getString() == "OFFSET") && + StringRef AsmTokStr = Parser.getTok().getString(); + if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && isParsingInlineAsm()) return ParseIntelOffsetOfOperator(Start); + // Type directive. + if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && + isParsingInlineAsm()) + return ParseIntelTypeOperator(Start); + + // Unsupported directives. + if (isParsingIntelSyntax() && + (AsmTokStr == "size" || AsmTokStr == "SIZE" || + AsmTokStr == "length" || AsmTokStr == "LENGTH")) + return ErrorOperand(Start, "Unsupported directive!"); + // immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { @@ -1087,8 +1211,9 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } bool X86AsmParser:: -ParseInstruction(StringRef Name, SMLoc NameLoc, +ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + InstInfo = &Info; StringRef PatchedName = Name; // FIXME: Hack to recognize setneb as setne. diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 45fd42f205..1b2ffb01ad 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -17,7 +17,6 @@ set(sources X86AsmPrinter.cpp X86COFFMachineModuleInfo.cpp X86CodeEmitter.cpp - X86ELFWriterInfo.cpp X86FastISel.cpp X86FloatingPoint.cpp X86FrameLowering.cpp diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b24f517209..85d8a991dd 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -694,7 +694,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, * @param orig - The instruction that is not 16-bit * @param equiv - The instruction that is 16-bit */ -static BOOL is16BitEquvalent(const char* orig, const char* equiv) { +static BOOL is16BitEquivalent(const char* orig, const char* equiv) { off_t i; for (i = 0;; i++) { @@ -860,7 +860,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); - if (is16BitEquvalent(specName, specWithOpSizeName)) { + if (is16BitEquivalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 6d3cd2411a..2c91c8c566 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -373,7 +373,7 @@ public: : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI); + return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); } }; @@ -384,7 +384,7 @@ public: : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI); + return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); } }; diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 5a42a80182..de80dd835e 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -20,7 +20,7 @@ using namespace llvm; namespace { class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: - X86ELFObjectWriter(bool is64Bit, uint8_t OSABI); + X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine); virtual ~X86ELFObjectWriter(); protected: @@ -30,10 +30,11 @@ namespace { }; } -X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI) - : MCELFObjectTargetWriter(Is64Bit, OSABI, - Is64Bit ? ELF::EM_X86_64 : ELF::EM_386, - /*HasRelocationAddend*/ Is64Bit) {} +X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, + uint16_t EMachine) + : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine, + // Only i386 uses Rel instead of RelA. + /*HasRelocationAddend*/ EMachine != ELF::EM_386) {} X86ELFObjectWriter::~X86ELFObjectWriter() {} @@ -48,7 +49,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); unsigned Type; - if (is64Bit()) { + if (getEMachine() == ELF::EM_X86_64) { if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); @@ -130,7 +131,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_1: Type = ELF::R_X86_64_8; break; } } - } else { + } else if (getEMachine() == ELF::EM_386) { if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); @@ -210,15 +211,17 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_1: Type = ELF::R_386_8; break; } } - } + } else + llvm_unreachable("Unsupported ELF machine type."); return Type; } MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI) { + bool IsELF64, + uint8_t OSABI, + uint16_t EMachine) { MCELFObjectTargetWriter *MOTW = - new X86ELFObjectWriter(Is64Bit, OSABI); + new X86ELFObjectWriter(IsELF64, OSABI, EMachine); return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 0b0989bff1..981aa1a2b9 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -89,8 +89,9 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, /// createX86ELFObjectWriter - Construct an X86 ELF object writer. MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS, - bool Is64Bit, - uint8_t OSABI); + bool IsELF64, + uint8_t OSABI, + uint16_t EMachine); /// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer. MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); } // End llvm namespace diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index e7aced8b52..9a63060c90 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -721,7 +721,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(), 0); } Stubs.clear(); } diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp deleted file mode 100644 index 2e08ef811f..0000000000 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the X86 backend. -// -//===----------------------------------------------------------------------===// - -#include "X86ELFWriterInfo.h" -#include "X86Relocations.h" -#include "llvm/Function.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Implementation of the X86ELFWriterInfo class -//===----------------------------------------------------------------------===// - -X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_) - : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { - EMachine = is64Bit ? EM_X86_64 : EM_386; - } - -X86ELFWriterInfo::~X86ELFWriterInfo() {} - -unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { - if (is64Bit) { - switch(MachineRelTy) { - case X86::reloc_pcrel_word: - return ELF::R_X86_64_PC32; - case X86::reloc_absolute_word: - return ELF::R_X86_64_32; - case X86::reloc_absolute_word_sext: - return ELF::R_X86_64_32S; - case X86::reloc_absolute_dword: - return ELF::R_X86_64_64; - case X86::reloc_picrel_word: - default: - llvm_unreachable("unknown x86_64 machine relocation type"); - } - } else { - switch(MachineRelTy) { - case X86::reloc_pcrel_word: - return ELF::R_386_PC32; - case X86::reloc_absolute_word: - return ELF::R_386_32; - case X86::reloc_absolute_word_sext: - case X86::reloc_absolute_dword: - case X86::reloc_picrel_word: - default: - llvm_unreachable("unknown x86 machine relocation type"); - } - } -} - -long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: return Modifier - 4; - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - case ELF::R_X86_64_64: - return Modifier; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: return Modifier - 4; - case ELF::R_386_32: return Modifier; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - return 32; - case ELF::R_X86_64_64: - return 64; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: - case ELF::R_386_32: - return 32; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - if (is64Bit) { - switch(RelTy) { - case ELF::R_X86_64_PC32: - return true; - case ELF::R_X86_64_32: - case ELF::R_X86_64_32S: - case ELF::R_X86_64_64: - return false; - default: - llvm_unreachable("unknown x86_64 relocation type"); - } - } else { - switch(RelTy) { - case ELF::R_386_PC32: - return true; - case ELF::R_386_32: - return false; - default: - llvm_unreachable("unknown x86 relocation type"); - } - } -} - -unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - return is64Bit ? - X86::reloc_absolute_dword : X86::reloc_absolute_word; -} - -long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, - unsigned RelOffset, - unsigned RelTy) const { - - if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); -} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h deleted file mode 100644 index a45b5bb66a..0000000000 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF writer information for the X86 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_ELF_WRITER_INFO_H -#define X86_ELF_WRITER_INFO_H - -#include "llvm/Target/TargetELFWriterInfo.h" - -namespace llvm { - - class X86ELFWriterInfo : public TargetELFWriterInfo { - - public: - X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_); - virtual ~X86ELFWriterInfo(); - - /// getRelocationType - Returns the target specific ELF Relocation type. - /// 'MachineRelTy' contains the object code independent relocation type - virtual unsigned getRelocationType(unsigned MachineRelTy) const; - - /// hasRelocationAddend - True if the target uses an addend in the - /// ELF relocation entry. - virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } - - /// getDefaultAddendForRelTy - Gets the default addend value for a - /// relocation entry based on the target ELF relocation type. - virtual long int getDefaultAddendForRelTy(unsigned RelTy, - long int Modifier = 0) const; - - /// getRelTySize - Returns the size of relocatable field in bits - virtual unsigned getRelocationTySize(unsigned RelTy) const; - - /// isPCRelativeRel - True if the relocation type is pc relative - virtual bool isPCRelativeRel(unsigned RelTy) const; - - /// getJumpTableRelocationTy - Returns the machine relocation type used - /// to reference a jumptable. - virtual unsigned getAbsoluteLabelMachineRelTy() const; - - /// computeRelocation - Some relocatable fields could be relocated - /// directly, avoiding the relocation symbol emission, compute the - /// final relocation value for this symbol. - virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, - unsigned RelTy) const; - }; - -} // end llvm namespace - -#endif // X86_ELF_WRITER_INFO_H diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index a5285d4dd8..ad652366ad 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -45,9 +45,9 @@ class X86FastISel : public FastISel { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// StackPtr - Register used as the stack pointer. + /// RegInfo - X86 register info. /// - unsigned StackPtr; + const X86RegisterInfo *RegInfo; /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. @@ -61,9 +61,9 @@ public: const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); - StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); + RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo()); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -282,9 +282,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Val)) { - Val = Constant::getNullValue(TD.getIntPtrType(Val->getType())); - } + if (isa<ConstantPointerNull>(Val)) + Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { @@ -916,9 +915,8 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, if (Op0Reg == 0) return false; // Handle 'null' like i32/i64 0. - if (isa<ConstantPointerNull>(Op1)) { - Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getType())); - } + if (isa<ConstantPointerNull>(Op1)) + Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use @@ -1808,7 +1806,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { } else { unsigned LocMemOffset = VA.getLocMemOffset(); X86AddressMode AM; - AM.Base.Reg = StackPtr; + AM.Base.Reg = RegInfo->getStackRegister(); AM.Disp = LocMemOffset; const Value *ArgVal = ArgVals[VA.getValNo()]; ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()]; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d3c5f0347..b0fb2f9f68 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -188,7 +188,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); - setStackPointerRegisterToSaveRestore(X86StackPtr); + setStackPointerRegisterToSaveRestore(X86StackPtr); // @LOCALMOD // Bypass i32 with i8 on Atom when compiling with O2 if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) @@ -570,7 +570,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); setOperationAction(ISD::VAEND , MVT::Other, Expand); - if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) { + if (Subtarget->is64Bit()) { setOperationAction(ISD::VAARG , MVT::Other, Custom); setOperationAction(ISD::VACOPY , MVT::Other, Custom); } else { @@ -2296,14 +2296,15 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, int FPDiff = 0; if (isTailCall && !IsSibcall) { // Lower arguments at fp - stackoffset + fpdiff. - unsigned NumBytesCallerPushed = - MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); + X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); + unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); + FPDiff = NumBytesCallerPushed - NumBytes; // Set the delta of movement of the returnaddr stackslot. // But only set if delta is greater than previous delta. - if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) - MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); + if (FPDiff < X86Info->getTCReturnAddrDelta()) + X86Info->setTCReturnAddrDelta(FPDiff); } if (!IsSibcall) @@ -2380,7 +2381,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD + getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } @@ -2468,7 +2470,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy relative to framepointer. SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + StackPtr = DAG.getCopyFromReg(Chain, dl, + X86StackPtr, // @LOCALMOD getPointerTy()); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); @@ -4665,7 +4668,6 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, MVT ShufVT = V.getValueType().getSimpleVT(); unsigned NumElems = ShufVT.getVectorNumElements(); SmallVector<int, 16> ShuffleMask; - SDValue ImmN; bool IsUnary; if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) @@ -6469,17 +6471,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR && V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF) // BUILD_VECTOR (load), undef V = V.getOperand(0); - if (MayFoldLoad(V)) - return true; - return false; + + return MayFoldLoad(V); } // FIXME: the version above should always be used. Since there's @@ -6629,8 +6631,8 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { // Find the expansion ratio, e.g. expanding from i8 to i32 has a ratio of 4. unsigned Shift = 1; // Start from 2, i.e. 1 << 1. - while ((1 << Shift) < NumElems) { - if (SVOp->getMaskElt(1 << Shift) == 1) + while ((1U << Shift) < NumElems) { + if (SVOp->getMaskElt(1U << Shift) == 1) break; Shift += 1; // The maximal ratio is 8, i.e. from i8 to i64. @@ -7911,7 +7913,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(), false, false, false, 0); - SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)), + SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), getPointerTy()); IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); @@ -9881,7 +9883,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); Flag = Chain.getValue(1); - Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD + SPTy).getValue(1); SDValue Ops1[2] = { Chain.getValue(0), Chain }; return DAG.getMergeValues(Ops1, 2, dl); @@ -14585,6 +14588,14 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return NewOp; SDValue InputVector = N->getOperand(0); + // Detect whether we are trying to convert from mmx to i32 and the bitcast + // from mmx to v2i32 has a single usage. + if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST && + InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx && + InputVector.hasOneUse() && N->getValueType(0) == MVT::i32) + return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(), + N->getValueType(0), + InputVector.getNode()->getOperand(0)); // Only operate on vectors of 4 elements, where the alternative shuffling // gets to be more expensive. @@ -16658,6 +16669,16 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Helper function of PerformSETCCCombine. It is to materialize "setb reg" +// as "sbb reg,reg", since it can be extended without zext and produces +// an all-ones bit which is more useful than 0/1 in some cases. +static SDValue MaterializeSETB(DebugLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { + return DAG.getNode(ISD::AND, DL, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS), + DAG.getConstant(1, MVT::i8)); +} + // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -16666,14 +16687,29 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); SDValue EFLAGS = N->getOperand(1); + if (CC == X86::COND_A) { + // Try to convert COND_A into COND_B in an attempt to facilitate + // materializing "setb reg". + // + // Do not flip "e > c", where "c" is a constant, because Cmp instruction + // cannot take an immediate as its first operand. + // + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { + SDValue NewSub = DAG.getNode(X86ISD::SUB, EFLAGS.getDebugLoc(), + EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return MaterializeSETB(DL, NewEFLAGS, DAG); + } + } + // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. if (CC == X86::COND_B) - return DAG.getNode(ISD::AND, DL, MVT::i8, - DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(CC, MVT::i8), EFLAGS), - DAG.getConstant(1, MVT::i8)); + return MaterializeSETB(DL, EFLAGS, DAG); SDValue Flags; @@ -17661,3 +17697,72 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } + +unsigned +X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty) const { + const X86Subtarget &ST = + TLI->getTargetMachine().getSubtarget<X86Subtarget>(); + + // Fix some of the inaccuracies of the target independent estimation. + if (Ty->isVectorTy() && ST.hasSSE41()) { + unsigned NumElem = Ty->getVectorNumElements(); + unsigned SizeInBits = Ty->getScalarType()->getScalarSizeInBits(); + + bool Is2 = (NumElem == 2); + bool Is4 = (NumElem == 4); + bool Is8 = (NumElem == 8); + bool Is32bits = (SizeInBits == 32); + bool Is64bits = (SizeInBits == 64); + bool HasAvx = ST.hasAVX(); + bool HasAvx2 = ST.hasAVX2(); + + switch (Opcode) { + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: { + // Only AVX2 has support for 8-wide integer operations. + if (Is32bits && (Is4 || (Is8 && HasAvx2))) return 1; + if (Is64bits && (Is2 || (Is4 && HasAvx2))) return 1; + + // We don't have to completly scalarize unsupported ops. We can + // issue two half-sized operations (with some overhead). + // We don't need to extract the lower part of the YMM to the XMM. + // Extract the upper, two ops, insert the upper = 4. + if (Is32bits && Is8 && HasAvx) return 4; + if (Is64bits && Is4 && HasAvx) return 4; + break; + } + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: { + // AVX has support for 8-wide float operations. + if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1; + if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1; + break; + } + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // AVX has support for 8-wide integer bitwise operations. + if (Is32bits && (Is4 || (Is8 && HasAvx))) return 1; + if (Is64bits && (Is2 || (Is4 && HasAvx))) return 1; + break; + } + } + } + + return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned +X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8d8f3f5161..9c4fc95b56 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -19,6 +19,7 @@ #include "X86RegisterInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetTransformImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -142,6 +143,10 @@ namespace llvm { /// mnemonic, so do I; blame Intel. MOVDQ2Q, + /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX + /// vector to a GPR. + MMX_MOVD2W, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -488,10 +493,6 @@ namespace llvm { getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - /// getStackPtrReg - Return the stack pointer register we are using: either - /// ESP or RSP. - unsigned getStackPtrReg() const { return X86StackPtr; } - /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at @@ -725,7 +726,7 @@ namespace llvm { const X86Subtarget *Subtarget; const X86RegisterInfo *RegInfo; const DataLayout *TD; - + // @LOCALMOD - This is essentially a revert of r167104 /// X86StackPtr - X86 physical register used as stack ptr. unsigned X86StackPtr; @@ -964,6 +965,18 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } + + class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { + public: + explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : + VectorTargetTransformImpl(TL) {} + + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + }; + } #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5628c3eada..0267fdd860 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1127,22 +1127,26 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // FIXME: add AVX 256-bit foldable instructions // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_16 }, - { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_16 }, + { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 }, { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_16 }, { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_16 }, { X86::VFMADDPS4rrY, X86::VFMADDPS4mrY, TB_ALIGN_32 }, { X86::VFMADDPD4rrY, X86::VFMADDPD4mrY, TB_ALIGN_32 }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 }, { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_16 }, { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_16 }, { X86::VFNMADDPS4rrY, X86::VFNMADDPS4mrY, TB_ALIGN_32 }, { X86::VFNMADDPD4rrY, X86::VFNMADDPD4mrY, TB_ALIGN_32 }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_16 }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_16 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 }, { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_16 }, { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_16 }, { X86::VFMSUBPS4rrY, X86::VFMSUBPS4mrY, TB_ALIGN_32 }, { X86::VFMSUBPD4rrY, X86::VFMSUBPD4mrY, TB_ALIGN_32 }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 }, { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_16 }, { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_16 }, { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4mrY, TB_ALIGN_32 }, @@ -1288,22 +1292,26 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_16 }, - { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_16 }, + { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 }, + { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 }, { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_16 }, { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_16 }, { X86::VFMADDPS4rrY, X86::VFMADDPS4rmY, TB_ALIGN_32 }, { X86::VFMADDPD4rrY, X86::VFMADDPD4rmY, TB_ALIGN_32 }, + { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 }, + { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 }, { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_16 }, { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_16 }, { X86::VFNMADDPS4rrY, X86::VFNMADDPS4rmY, TB_ALIGN_32 }, { X86::VFNMADDPD4rrY, X86::VFNMADDPD4rmY, TB_ALIGN_32 }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_16 }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_16 }, + { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 }, + { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 }, { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_16 }, { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_16 }, { X86::VFMSUBPS4rrY, X86::VFMSUBPS4rmY, TB_ALIGN_32 }, { X86::VFMSUBPD4rrY, X86::VFMSUBPD4rmY, TB_ALIGN_32 }, + { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 }, + { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 }, { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_16 }, { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_16 }, { X86::VFNMSUBPS4rrY, X86::VFNMSUBPS4rmY, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index bd5485840d..127af6f7f9 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -207,8 +207,14 @@ def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>; -def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>; + +// Low word of MMX to GPR. +def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, + [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; +def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e9c7f3e7f1..dff2d4ea1c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3293,17 +3293,52 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, + Intrinsic F32Int, OpndItins itins> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rr>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rm>; + } +} + // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, - SSE_SQRTS>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, + SSE_SQRTS>, sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, SSE_SQRTS>; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, - SSE_RCPS>, +let Predicates = [UseSSE1] in { + def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), + (RSQRTSSr_Int VR128:$src, VR128:$src)>; +} + +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, + SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; +let Predicates = [UseSSE1] in { + def : Pat<(int_x86_sse_rcp_ss VR128:$src), + (RCPSSr_Int VR128:$src, VR128:$src)>; +} // There is no f64 version of the reciprocal approximation instructions. @@ -5850,6 +5885,21 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), + (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; } let Predicates = [HasAVX] in { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 6e53e7ac93..a102935b4b 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -62,8 +62,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { EVT IntPtr = TLI.getPointerTy(); - unsigned AS = DstPtrInfo.getAddrSpace(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index e31bedf6de..59c037f296 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -85,7 +85,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), - ELFWriterInfo(is64Bit, true), InstrItins(Subtarget.getInstrItineraryData()){ // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index def028f191..967ce95d10 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -15,7 +15,6 @@ #define X86TARGETMACHINE_H #include "X86.h" -#include "X86ELFWriterInfo.h" #include "X86InstrInfo.h" #include "X86ISelLowering.h" #include "X86FrameLowering.h" @@ -37,7 +36,6 @@ class StringRef; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; X86FrameLowering FrameLowering; - X86ELFWriterInfo ELFWriterInfo; InstrItineraryData InstrItins; public: @@ -66,9 +64,6 @@ public: virtual const X86RegisterInfo *getRegisterInfo() const { return &getInstrInfo()->getRegisterInfo(); } - virtual const X86ELFWriterInfo *getELFWriterInfo() const { - return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; - } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } @@ -94,7 +89,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; #endif ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; + X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -131,7 +126,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86TargetLowering TLInfo; X86JITInfo JITInfo; ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; + X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index eaa745ba9b..9e7816e21f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -477,8 +477,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - unsigned AS = LD->getAddressSpace(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -537,8 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - unsigned AS = ST->getAddressSpace(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext(), AS); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index b2748f2e6c..05aefeff9f 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -51,9 +51,9 @@ namespace { // Visit the GlobalVariables. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { - I->setInitializer(0); - } else { + bool Delete = + deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; if (I->getName() == "llvm.global_ctors") @@ -69,16 +69,22 @@ namespace { // @LOCALMOD-END } - if (I->hasLocalLinkage()) + bool Local = I->hasLocalLinkage(); + if (Local) I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); + + if (Local || Delete) + I->setLinkage(GlobalValue::ExternalLinkage); + + if (Delete) + I->setInitializer(0); } // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { - I->deleteBody(); - } else { + bool Delete = + deleteStuff == (bool)Named.count(I) && !I->isDeclaration(); + if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; // @LOCALMOD-BEGIN - this is likely upstreamable @@ -90,9 +96,46 @@ namespace { // @LOCALMOD-END } - if (I->hasLocalLinkage()) + bool Local = I->hasLocalLinkage(); + if (Local) I->setVisibility(GlobalValue::HiddenVisibility); - I->setLinkage(GlobalValue::ExternalLinkage); + + if (Local || Delete) + I->setLinkage(GlobalValue::ExternalLinkage); + + if (Delete) + I->deleteBody(); + } + + // Visit the Aliases. + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + I != E;) { + Module::alias_iterator CurI = I; + ++I; + + if (CurI->hasLocalLinkage()) { + CurI->setVisibility(GlobalValue::HiddenVisibility); + CurI->setLinkage(GlobalValue::ExternalLinkage); + } + + if (deleteStuff == (bool)Named.count(CurI)) { + Type *Ty = CurI->getType()->getElementType(); + + CurI->removeFromParent(); + llvm::Value *Declaration; + if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { + Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, + CurI->getName(), &M); + + } else { + Declaration = + new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, + 0, CurI->getName()); + + } + CurI->replaceAllUsesWith(Declaration); + delete CurI; + } } return true; diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index d8f374c330..18409f77b3 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -28,9 +28,9 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/UniqueVector.h" #include "llvm/Support/InstIterator.h" using namespace llvm; @@ -486,13 +486,13 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { /// or a pointer that doesn't alias any other pointer visible to the caller. bool FunctionAttrs::IsFunctionMallocLike(Function *F, SmallPtrSet<Function*, 8> &SCCNodes) const { - UniqueVector<Value *> FlowsToReturn; + SmallSetVector<Value *, 8> FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) FlowsToReturn.insert(Ret->getReturnValue()); for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { - Value *RetVal = FlowsToReturn[i+1]; // UniqueVector[0] is reserved. + Value *RetVal = FlowsToReturn[i]; if (Constant *C = dyn_cast<Constant>(RetVal)) { if (!C->isNullValue() && !isa<UndefValue>(C)) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 3d5657fe6a..678189b3d6 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1500,7 +1500,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, unsigned TypeSize = TD->getTypeAllocSize(FieldTy); if (StructType *ST = dyn_cast<StructType>(FieldTy)) TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); - Type *IntPtrTy = TD->getIntPtrType(GV->getType()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), NElems, 0, @@ -1730,7 +1730,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is a fixed size array, transform the Malloc to be an alloc of // structs. malloc [100 x struct],1 -> malloc struct, 100 if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { - Type *IntPtrTy = TD->getIntPtrType(GV->getType()); + Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 86c76f0c0a..5d563d8bbf 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -95,7 +95,10 @@ void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) { } void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) { - unwrap(PM)->add(createInternalizePass(AllButMain != 0)); + std::vector<const char *> Export; + if (AllButMain) + Export.push_back("main"); + unwrap(PM)->add(createInternalizePass(Export)); } void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index fb5869ede2..aa629cc0c6 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // -// This pass loops over all of the functions in the input module, looking for a -// main function. If a main function is found, all other functions and all -// global variables with initializers are marked as internal. +// This pass loops over all of the functions and variables in the input module. +// If the function or variable is not in the list of external names given to +// the pass it is marked as internal. // //===----------------------------------------------------------------------===// @@ -45,12 +45,9 @@ APIList("internalize-public-api-list", cl::value_desc("list"), namespace { class InternalizePass : public ModulePass { std::set<std::string> ExternalNames; - /// If no api symbols were specified and a main function is defined, - /// assume the main function is the only API - bool AllButMain; public: static char ID; // Pass identification, replacement for typeid - explicit InternalizePass(bool AllButMain = true); + explicit InternalizePass(); explicit InternalizePass(const std::vector <const char *>& exportList); void LoadFile(const char *Filename); virtual bool runOnModule(Module &M); @@ -66,8 +63,8 @@ char InternalizePass::ID = 0; INITIALIZE_PASS(InternalizePass, "internalize", "Internalize Global Symbols", false, false) -InternalizePass::InternalizePass(bool AllButMain) - : ModulePass(ID), AllButMain(AllButMain){ +InternalizePass::InternalizePass() + : ModulePass(ID) { initializeInternalizePassPass(*PassRegistry::getPassRegistry()); if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); @@ -76,7 +73,7 @@ InternalizePass::InternalizePass(bool AllButMain) } InternalizePass::InternalizePass(const std::vector<const char *>&exportList) - : ModulePass(ID), AllButMain(false){ + : ModulePass(ID){ initializeInternalizePassPass(*PassRegistry::getPassRegistry()); for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { @@ -103,23 +100,6 @@ void InternalizePass::LoadFile(const char *Filename) { bool InternalizePass::runOnModule(Module &M) { CallGraph *CG = getAnalysisIfAvailable<CallGraph>(); CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; - - if (ExternalNames.empty()) { - // Return if we're not in 'all but main' mode and have no external api - if (!AllButMain) - return false; - // If no list or file of symbols was specified, check to see if there is a - // "main" symbol defined in the module. If so, use it, otherwise do not - // internalize the module, it must be a library or something. - // - Function *MainFunc = M.getFunction("main"); - if (MainFunc == 0 || MainFunc->isDeclaration()) - return false; // No main found, must be a library... - - // Preserve main, internalize all else. - ExternalNames.insert(MainFunc->getName()); - } - bool Changed = false; // Never internalize functions which code-gen might insert. @@ -189,8 +169,8 @@ bool InternalizePass::runOnModule(Module &M) { return Changed; } -ModulePass *llvm::createInternalizePass(bool AllButMain) { - return new InternalizePass(AllButMain); +ModulePass *llvm::createInternalizePass() { + return new InternalizePass(); } ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) { diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 1c6477c022..44283ddce7 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -206,8 +206,9 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, return true; if (Ty1->getTypeID() != Ty2->getTypeID()) { if (TD) { - if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1)) return true; - if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2)) return true; + LLVMContext &Ctx = Ty1->getContext(); + if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true; + if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true; } return false; } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 1d8f1e531a..05253fcdda 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -33,7 +33,11 @@ using namespace llvm; static cl::opt<bool> -RunVectorization("vectorize", cl::desc("Run vectorization passes")); +RunLoopVectorization("vectorize-loops", + cl::desc("Run the Loop vectorization passes")); + +static cl::opt<bool> +RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes")); static cl::opt<bool> UseGVNAfterVectorization("use-gvn-after-vectorization", @@ -52,7 +56,8 @@ PassManagerBuilder::PassManagerBuilder() { DisableSimplifyLibCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; - Vectorize = RunVectorization; + Vectorize = RunBBVectorization; + LoopVectorize = RunLoopVectorization; } PassManagerBuilder::~PassManagerBuilder() { @@ -185,7 +190,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (Vectorize) { + if (LoopVectorize) { MPM.add(createLoopVectorizePass()); MPM.add(createLICMPass()); } @@ -245,8 +250,11 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // Now that composite has been compiled, scan through the module, looking // for a main function. If main is defined, mark all other functions // internal. - if (Internalize) - PM.add(createInternalizePass(true)); + if (Internalize) { + std::vector<const char*> E; + E.push_back("main"); + PM.add(createInternalizePass(E)); + } // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 0e765f7aaa..7467eca7ab 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -208,7 +208,7 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy, + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0958842d08..4f4c388a92 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -996,9 +996,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Conversion is ok if changing from one pointer type to another or from // a pointer to an integer of the same size. !((OldRetTy->isPointerTy() || !TD || - OldRetTy == TD->getIntPtrType(NewRetTy)) && + OldRetTy == TD->getIntPtrType(Caller->getContext())) && (NewRetTy->isPointerTy() || !TD || - NewRetTy == TD->getIntPtrType(OldRetTy)))) + NewRetTy == TD->getIntPtrType(Caller->getContext())))) return false; // Cannot transform this return value. if (!Caller->use_empty() && @@ -1057,13 +1057,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. - // FIXME: Not sure what to do here, so setting AS to 0. - // How can the AS for a function call be outside the default? bool isConvertible = ActTy == ParamTy || (TD && ((ParamTy->isPointerTy() || - ParamTy == TD->getIntPtrType(ActTy)) && + ParamTy == TD->getIntPtrType(Caller->getContext())) && (ActTy->isPointerTy() || - ActTy == TD->getIntPtrType(ParamTy)))); + ActTy == TD->getIntPtrType(Caller->getContext())))); if (Callee->isDeclaration() && !isConvertible) return false; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 119d2f5c99..bb59db8e7b 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Scale = 0; return ConstantInt::get(Val->getType(), 0); } - + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); @@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Mul) { // This value is scaled by 'RHS'. Scale = RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, + // We have X+C. Check to see if we really have (X*C2)+C1, // where C1 is divisible by C2. unsigned SubScale; - Value *SubVal = + Value *SubVal = DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; @@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (!TD) return 0; PointerType *PTy = cast<PointerType>(CI.getType()); - + BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); @@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - + // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || @@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements); } - + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off); } - + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); - + // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. @@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that +/// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; - } + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - + // Otherwise, must be the same type of cast, so just reinsert a new one. // This also handles the case of zext(trunc(x)) -> zext(x). Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, @@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Res = NPN; break; } - default: + default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); } - + Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. -static Instruction::CastOps +static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction @@ -238,18 +238,22 @@ isEliminableCastPair( // Get the opcodes of the two Cast instructions Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); + Type *SrcIntPtrTy = TD && SrcTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(SrcTy) : 0; + Type *MidIntPtrTy = TD && MidTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(MidTy) : 0; + Type *DstIntPtrTy = TD && DstTy->isPtrOrPtrVectorTy() ? + TD->getIntPtrType(DstTy) : 0; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, - TD ? TD->getIntPtrType(DstTy) : 0); + DstTy, SrcIntPtrTy, MidIntPtrTy, + DstIntPtrTy); // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && - (!TD || SrcTy != TD->getIntPtrType(DstTy))) || - (Res == Instruction::PtrToInt && - (!TD || DstTy != TD->getIntPtrType(SrcTy)))) + if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || + (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; - + return Instruction::CastOps(Res); } @@ -261,18 +265,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - + // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) if (isEliminableCastPair(CI, opc, Ty, TD)) return false; - + // If this is a vector sext from a compare, then we don't want to break the // idiom where each element of the extended vector is either zero or all ones. if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) return false; - + return true; } @@ -284,7 +288,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast - if (Instruction::CastOps opc = + if (Instruction::CastOps opc = isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. @@ -307,7 +311,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - + return 0; } @@ -326,15 +330,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + Type *OrigTy = V->getType(); - + // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. - if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && I->getOperand(0)->getType() == Ty) return true; @@ -419,29 +423,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); - + // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { - + // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -458,7 +462,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; if (Src->hasOneUse() && @@ -468,7 +472,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // ASize < MidSize and MidSize > ResultSize, but don't know the relation // between ASize and ResultSize. unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - + // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) @@ -481,7 +485,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } - + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && @@ -504,7 +508,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); - + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || @@ -534,14 +538,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); - + APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoXform) return ICI; @@ -555,7 +559,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } - + uint32_t ShiftAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); if (ShiftAmt) { @@ -564,12 +568,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), In->getName()+".lobit"); } - + if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } - + if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); @@ -642,19 +646,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If the input is a truncate from the destination type, we can trivially // eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + unsigned Opc = I->getOpcode(), Tmp; switch (Opc) { case Instruction::ZExt: // zext(zext(x)) -> zext(x). @@ -674,7 +678,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) return true; - + // If the operation is an AND/OR/XOR and the bits to clear are zero in the // other side, BitsToClear is ok. if (Tmp == 0 && @@ -687,10 +691,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { APInt::getHighBitsSet(VSize, BitsToClear))) return true; } - + // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - + case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. @@ -712,7 +716,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Tmp != BitsToClear) return false; return true; - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -739,44 +743,44 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) return Result; - // See if we can simplify any instructions used by the input whose sole + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); - + // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); - + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // If the high bits are already filled with zeros, just replace this // cast with the result. if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, DestBitSize-SrcBitsKept))) return ReplaceInstUsesWith(CI, Res); - + // We need to emit an AND to clear the high bits. Constant *C = ConstantInt::get(Res->getType(), APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); @@ -788,7 +792,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // 'and' which will be much cheaper than the pair of casts. if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast // TODO: Subsume this into EvaluateInDifferentType. - + // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); @@ -805,7 +809,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } - + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), @@ -814,7 +818,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize > DstSize) { Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), AndValue)); } @@ -872,7 +876,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - + return 0; } @@ -985,14 +989,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // If this is a constant, it can be trivially promoted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is a truncate from the dest type, we can trivially eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; @@ -1011,14 +1015,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // These operators can all arbitrarily be extended if their inputs can. return CanEvaluateSExtd(I->getOperand(0), Ty) && CanEvaluateSExtd(I->getOperand(1), Ty); - + //case Instruction::Shl: TODO //case Instruction::LShr: TODO - + case Instruction::Select: return CanEvaluateSExtd(I->getOperand(1), Ty) && CanEvaluateSExtd(I->getOperand(2), Ty); - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -1032,7 +1036,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } @@ -1041,15 +1045,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + if (Instruction *I = commonCastTransforms(CI)) return I; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); @@ -1072,7 +1076,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // cast with the result. if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), @@ -1085,7 +1089,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); @@ -1121,7 +1125,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } - + return 0; } @@ -1143,7 +1147,7 @@ static Value *LookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0)); - + // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. @@ -1162,14 +1166,14 @@ static Value *LookThroughFPExtensions(Value *V) { return V; // Don't try to shrink to various long double types. } - + return V; } Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well @@ -1186,7 +1190,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && + if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of @@ -1198,10 +1202,10 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } - break; + break; } } - + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && @@ -1216,7 +1220,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1224,15 +1228,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); - - + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } } - + return 0; } @@ -1250,7 +1254,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ @@ -1264,19 +1268,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); if (OpI == 0) return commonCastTransforms(FI); - + // fptosi(sitofp(X)) --> X // fptosi(uitofp(X)) --> X // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - + return commonCastTransforms(FI); } @@ -1292,22 +1296,21 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - unsigned AS = CI.getAddressSpace(); if (TD) { if (CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits(AS)) { + TD->getPointerSizeInBits()) { Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getType())); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } if (CI.getOperand(0)->getType()->getScalarSizeInBits() < - TD->getPointerSizeInBits(AS)) { + TD->getPointerSizeInBits()) { Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getType())); + TD->getIntPtrType(CI.getContext())); return new IntToPtrInst(P, CI.getType()); } } - + if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1317,19 +1320,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! if (GEP->hasAllZeroIndices()) { // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another + // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } - + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other @@ -1344,8 +1347,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; - Type *IntPtrTy = TD->getIntPtrType(OrigBase->getType()); - if (FindElementAtOffset(GEPIdxTy, Offset, IntPtrTy, NewIndices)) { + if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. @@ -1353,15 +1355,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Builder->CreateInBoundsGEP(OrigBase, NewIndices) : Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); - + if (isa<BitCastInst>(CI)) return new BitCastInst(NGEP, CI.getType()); assert(isa<PtrToIntInst>(CI)); return new PtrToIntInst(NGEP, CI.getType()); - } + } } } - + return commonCastTransforms(CI); } @@ -1369,20 +1371,19 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - unsigned AS = CI.getPointerAddressSpace(); if (TD) { - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) { + if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext(), AS)); + TD->getIntPtrType(CI.getContext())); return new TruncInst(P, CI.getType()); } - if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) { + if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext(), AS)); + TD->getIntPtrType(CI.getContext())); return new ZExtInst(P, CI.getType()); } } - + return commonPointerCastTransforms(CI); } @@ -1397,33 +1398,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. VectorType *SrcTy = cast<VectorType>(InVal->getType()); - + if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the // same size. There is no specific reason we couldn't handle things like // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten - // there yet. + // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) return 0; - + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); } - + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - + if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) ShuffleMask.push_back(i); - + } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros @@ -1437,7 +1438,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) ShuffleMask.push_back(SrcElts); } - + return new ShuffleVectorInst(InVal, V2, ConstantDataVector::get(V2->getContext(), ShuffleMask)); @@ -1464,7 +1465,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; - + // If we got down to a value of the right type, we win, try inserting into the // right element. if (V->getType() == VecEltTy) { @@ -1472,15 +1473,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue()) return true; - + // Fail if multiple elements are inserted into this slot. if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) return false; - + Elements[ElementIndex] = V; return true; } - + if (Constant *C = dyn_cast<Constant>(V)) { // Figure out the # elements this provides, and bitcast it or slice it up // as required. @@ -1491,7 +1492,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), ElementIndex, Elements, VecEltTy); - + // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. if (!isa<IntegerType>(C->getType())) @@ -1499,7 +1500,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); - + for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), i*ElementSize)); @@ -1509,23 +1510,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, } return true; } - + if (!V->hasOneUse()) return false; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::Or: return CollectInsertionElements(I->getOperand(0), ElementIndex, Elements, VecEltTy) && @@ -1537,11 +1538,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (CI == 0) return false; if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, Elements, VecEltTy); } - + } } @@ -1576,11 +1577,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] == 0) continue; // Unset element. - + Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); } - + return Result; } @@ -1608,11 +1609,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); } } - + // bitcast(trunc(lshr(bitcast(somevector), cst)) ConstantInt *ShAmt = 0; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), @@ -1629,7 +1630,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + unsigned Elt = ShAmt->getZExtValue() / DestWidth; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1653,12 +1654,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { PointerType *SrcPTy = cast<PointerType>(SrcTy); Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - + // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1666,14 +1667,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; - + // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; - while (SrcElTy != DstElTy && + while (SrcElTy != DstElTy && isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); @@ -1686,7 +1687,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - + // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) @@ -1699,7 +1700,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - + if (isa<IntegerType>(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all @@ -1712,7 +1713,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { cast<VectorType>(DestTy), *this)) return I; } - + // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. @@ -1723,7 +1724,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = + Value *Elem = Builder->CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); @@ -1733,7 +1734,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1742,9 +1743,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); @@ -1754,7 +1755,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } } - + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 055c3b1514..8cb4a59cba 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -365,13 +365,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); - unsigned AS = GEP->getPointerAddressSpace(); // If the index is larger than the pointer size of the target, truncate the // index down like the GEP would do implicitly. We don't have to do this for // an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds() && - Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS)) - Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext(), AS)); + Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits()) + Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext())); // If the comparison is only true for one or two elements, emit direct // comparisons. @@ -529,17 +528,16 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { } } - unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace(); // Okay, we know we have a single variable index, which must be a // pointer/array/vector index. If there is no offset, life is simple, return // the index. - unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); + unsigned IntPtrWidth = TD.getPointerSizeInBits(); if (Offset == 0) { // Cast to intptrty in case a truncation occurs. If an extension is needed, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; @@ -561,7 +559,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { return 0; // Okay, we can do this evaluation. Start by converting the index to intptr. - Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext(), AS); + Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); @@ -1554,7 +1552,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && - TD->getTypeSizeInBits(DestTy) == + TD->getPointerSizeInBits() == cast<IntegerType>(DestTy)->getBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { @@ -2250,7 +2248,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && - TD->getIntPtrType(LHSI->getType()) == + TD->getIntPtrType(RHSC->getContext()) == LHSI->getOperand(0)->getType()) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), Constant::getNullValue(LHSI->getOperand(0)->getType())); @@ -2897,10 +2895,6 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (!RHSF) break; - // We can't convert a PPC double double. - if (RHSF->getType()->isPPC_FP128Ty()) - break; - const fltSemantics *Sem; // FIXME: This shouldn't be here. if (LHSExt->getSrcTy()->isHalfTy()) @@ -2913,6 +2907,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Sem = &APFloat::IEEEquad; else if (LHSExt->getSrcTy()->isX86_FP80Ty()) Sem = &APFloat::x87DoubleExtended; + else if (LHSExt->getSrcTy()->isPPC_FP128Ty()) + Sem = &APFloat::PPCDoubleDouble; else break; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 633ad93ad9..4ab5b6e4a0 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -173,7 +173,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { - Type *IntPtrTy = TD->getIntPtrType(AI.getType()); + Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); @@ -185,7 +185,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { - Type *NewTy = + Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, Type *SrcPTy = SrcTy->getElementType(); - if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || + if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for @@ -328,7 +328,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } if (IC.getDataLayout() && - (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || + (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. @@ -339,7 +339,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - LoadInst *NewLoad = + LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); @@ -376,7 +376,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return 0; - + // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. @@ -397,7 +397,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Constant::getNullValue(Op->getType()), &LI); return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } - } + } // load null/undef -> unreachable // TODO: Consider a target hook for valid address spaces for this xform. @@ -416,7 +416,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (CE->isCast()) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; - + if (Op->hasOneUse()) { // Change select and PHI nodes to select values instead of addresses: this // helps alias analysis out a lot, allows many others simplifications, and @@ -470,18 +470,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); if (SrcTy == 0) return 0; - + Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) return 0; - + /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) /// on 32-bit hosts. SmallVector<Value*, 4> NewGEPIndices; - + // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. @@ -489,7 +489,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // Index through pointer. Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); NewGEPIndices.push_back(Zero); - + while (1) { if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { if (!STy->getNumElements()) /* Struct can be empty {} */ @@ -503,23 +503,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { break; } } - + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) return 0; - + // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. if (!IC.getDataLayout() || - SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace() || + SrcTy->getAddressSpace() != + cast<PointerType>(CI->getType())->getAddressSpace() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before + // the same size. Instead of casting the pointer before // the store, cast the value to be stored. Value *NewCast; Value *SIOp0 = SI.getOperand(0); @@ -533,12 +534,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { if (SIOp0->getType()->isPointerTy()) opcode = Instruction::PtrToInt; } - + // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. if (!NewGEPIndices.empty()) CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"); SI.setOperand(0, NewCast); @@ -557,7 +558,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { static bool equivalentAddressValues(Value *A, Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - + // Test if the values come form identical arithmetic instructions. // This uses isIdenticalToWhenDefined instead of isIdenticalTo because // its only used to compare two uses within the same basic block, which @@ -570,7 +571,7 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (Instruction *BI = dyn_cast<Instruction>(B)) if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) return true; - + // Otherwise they may not be equivalent. return false; } @@ -601,7 +602,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { - if (isa<AllocaInst>(Ptr)) + if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { @@ -624,8 +625,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; - } - + } + if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), @@ -637,7 +638,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } break; } - + // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). @@ -645,12 +646,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); - + // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } - + // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; @@ -680,11 +681,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; - + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. - BBI = &SI; + BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || @@ -693,7 +694,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! - + return 0; } @@ -707,12 +708,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); - + // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); - + // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); @@ -724,7 +725,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { if (++PI == pred_end(DestBB)) return false; - + P = *PI; if (P != StoreBB) { if (OtherBB) @@ -744,7 +745,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; - + // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = 0; @@ -766,10 +767,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. - if (OtherBr->getSuccessor(0) != StoreBB && + if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; - + // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. @@ -787,7 +788,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BBI == OtherBB->begin()) return false; } - + // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. @@ -797,7 +798,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { return false; } } - + // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { @@ -806,7 +807,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } - + // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); @@ -816,7 +817,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); - NewSI->setDebugLoc(OtherStore->getDebugLoc()); + NewSI->setDebugLoc(OtherStore->getDebugLoc()); // Nuke the old stores. EraseInstFromFunction(SI); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 00b7fca681..ccf75bca2b 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -738,7 +738,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; @@ -746,6 +746,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, Type *IntPtrTy // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] + Type *IntPtrTy = TD->getIntPtrType(Ty->getContext()); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1054,7 +1055,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // by multiples of a zero size type with zero. if (TD) { bool MadeChange = false; - Type *IntPtrTy = TD->getIntPtrType(PtrOp->getType()); + Type *IntPtrTy = TD->getIntPtrType(GEP.getPointerOperandType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); @@ -1073,7 +1074,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } Type *IndexTy = (*I)->getType(); - if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { + if (IndexTy != IntPtrTy) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -1239,7 +1240,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && "Index not cast to pointer width?"); bool NSW; @@ -1274,7 +1275,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Earlier transforms ensure that the index has type IntPtrType, which // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) && + assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) && "Index not cast to pointer width?"); bool NSW; @@ -1336,8 +1337,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> NewIndices; Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); - Type *IntPtrTy = TD->getIntPtrType(BCI->getOperand(0)->getType()); - if (FindElementAtOffset(InTy, Offset, IntPtrTy, NewIndices)) { + if (FindElementAtOffset(InTy, Offset, NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : Builder->CreateGEP(BCI->getOperand(0), NewIndices); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4abaeca0c5..93f785ca5b 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -215,6 +215,7 @@ struct AddressSanitizer : public FunctionPass { Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; + SmallSet<GlobalValue*, 32> GlobalsCreatedByAsan; }; } // namespace @@ -508,6 +509,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { if (BL->isIn(*G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; + if (GlobalsCreatedByAsan.count(G)) return false; // Our own global. // Touch only those globals that will not be defined in other modules. // Don't handle ODR type linkages since other modules may be built w/o asan. if (G->getLinkage() != GlobalVariable::ExternalLinkage && @@ -704,7 +706,7 @@ bool AddressSanitizer::doInitialization(Module &M) { BL.reset(new BlackList(ClBlackListFile)); C = &(M.getContext()); - LongSize = TD->getPointerSizeInBits(0); + LongSize = TD->getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); IntptrPtrTy = PointerType::get(IntptrTy, 0); @@ -1090,9 +1092,10 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) { Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, LongSize/8)); BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); - Value *Description = IRB.CreatePointerCast( - createPrivateGlobalForString(*F.getParent(), StackDescription.str()), - IntptrTy); + GlobalVariable *StackDescriptionGlobal = + createPrivateGlobalForString(*F.getParent(), StackDescription.str()); + GlobalsCreatedByAsan.insert(StackDescriptionGlobal); + Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); // Poison the stack redzones at the entry. diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index dd36a00070..7810b1b8a3 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -143,7 +143,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size); - IntegerType *IntTy = TD->getIntPtrType(Ptr->getType()); + Type *IntTy = TD->getIntPtrType(Ptr->getType()); Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); // three checks are required to ensure safety: diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 74e310f7e7..123ed0f4f3 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,7 +18,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -128,7 +127,6 @@ namespace { bool OptimizeSelectInst(SelectInst *SI); bool DupRetToEnableTailCallOpts(ReturnInst *RI); bool PlaceDbgValues(Function &F); - bool ConvertLoadToSwitch(LoadInst *LI); }; } @@ -935,7 +933,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst); Type *IntPtrTy = - TLI->getDataLayout()->getIntPtrType(Addr->getType()); + TLI->getDataLayout()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; @@ -1292,11 +1290,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - bool Changed = false; if (TLI) - Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); - Changed |= ConvertLoadToSwitch(LI); - return Changed; + return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); + return false; } if (StoreInst *SI = dyn_cast<StoreInst>(I)) { @@ -1376,109 +1372,3 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } - -static bool TargetSupportsJumpTables(const TargetLowering &TLI) { - return TLI.supportJumpTables() && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); -} - -/// ConvertLoadToSwitch - Convert loads from constant lookup tables into -/// switches. This undos the switch-to-lookup table transformation in -/// SimplifyCFG for targets where that is inprofitable. -bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) { - // This only applies to targets that don't support jump tables. - if (!TLI || TargetSupportsJumpTables(*TLI)) - return false; - - // FIXME: In the future, it would be desirable to have enough target - // information in SimplifyCFG, so we could decide at that stage whether to - // transform the switch to a lookup table or not, and this - // reverse-transformation could be removed. - - GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand()); - if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace()) - return false; - if (GEP->getNumIndices() != 2) - return false; - Value *FirstIndex = GEP->idx_begin()[0]; - ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex); - if (!FirstIndexInt || !FirstIndexInt->isZero()) - return false; - - Value *TableIndex = GEP->idx_begin()[1]; - IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType()); - - GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()); - if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) - return false; - - Constant *Arr = GV->getInitializer(); - uint64_t NumElements; - if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr)) - NumElements = CA->getType()->getNumElements(); - else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr)) - NumElements = CDA->getNumElements(); - else - return false; - if (NumElements < 2) - return false; - - // Split the block. - BasicBlock *OriginalBB = LI->getParent(); - BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI); - - // Replace OriginalBB's terminator with a switch. - IRBuilder<> Builder(OriginalBB->getTerminator()); - SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB, - NumElements - 1); - OriginalBB->getTerminator()->eraseFromParent(); - - // Count the frequency of each value to decide which to use as default. - SmallDenseMap<Constant*, uint64_t> ValueFreq; - for (uint64_t I = 0; I < NumElements; ++I) - ++ValueFreq[Arr->getAggregateElement(I)]; - uint64_t MaxCount = 0; - Constant *DefaultValue = NULL; - for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(), - E = ValueFreq.end(); I != E; ++I) { - if (I->second > MaxCount) { - MaxCount = I->second; - DefaultValue = I->first; - } - } - assert(DefaultValue && "No values in the array?"); - - // Create the phi node in PostSwitchBB, which will replace the load. - Builder.SetInsertPoint(PostSwitchBB->begin()); - PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements); - PHI->addIncoming(DefaultValue, OriginalBB); - - // Build basic blocks to target with the switch. - for (uint64_t I = 0; I < NumElements; ++I) { - Constant *C = Arr->getAggregateElement(I); - if (C == DefaultValue) continue; // Already covered by the default case. - - BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(), - "lookup.bb", - PostSwitchBB->getParent(), - PostSwitchBB); - Switch->addCase(ConstantInt::get(TableIndexTy, I), BB); - Builder.SetInsertPoint(BB); - Builder.CreateBr(PostSwitchBB); - PHI->addIncoming(C, BB); - } - - // Remove the load. - LI->replaceAllUsesWith(PHI); - LI->eraseFromParent(); - - // Clean up. - if (GEP->use_empty()) - GEP->eraseFromParent(); - if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty()) - GV->eraseFromParent(); - - CurInstIterator = Switch; - return true; -} diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index b6e15540e7..f003e06699 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -746,16 +746,6 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, return true; } -/// Wrap TD.getIntPtrType, but return a vector type for vector inputs. -static Type *getIntPtrType(Type *Ty, const DataLayout &TD) { - Type *ITy = TD.getIntPtrType(Ty); - if (Ty->isVectorTy()) { - ITy = VectorType::get(ITy, Ty->getVectorNumElements()); - } - - return ITy; -} - /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and /// then a load from a must-aliased pointer of a different type, try to coerce /// the stored value. LoadedTy is the type of the load we want to replace and @@ -784,13 +774,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, // Convert source pointers to integers, which can be bitcast. if (StoredValTy->getScalarType()->isPointerTy()) { - StoredValTy = getIntPtrType(StoredValTy, TD); + StoredValTy = TD.getIntPtrType(StoredValTy); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->getScalarType()->isPointerTy()) - TypeToCastTo = getIntPtrType(StoredValTy, TD); + TypeToCastTo = TD.getIntPtrType(TypeToCastTo); if (StoredValTy != TypeToCastTo) StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); @@ -809,7 +799,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, // Convert source pointers to integers, which can be manipulated. if (StoredValTy->getScalarType()->isPointerTy()) { - StoredValTy = getIntPtrType(StoredValTy, TD); + StoredValTy = TD.getIntPtrType(StoredValTy); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } @@ -1031,7 +1021,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, // to an integer type to start with. if (SrcVal->getType()->getScalarType()->isPointerTy()) SrcVal = Builder.CreatePtrToInt(SrcVal, - getIntPtrType(SrcVal->getType(), TD)); + TD.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 8a2f093629..310fd6147a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -220,8 +220,6 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, /// ConvertToSInt - Convert APF to an integer, if possible. static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { bool isExact = false; - if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) - return false; // See if we can convert this to an int64_t uint64_t UIntVal; if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, @@ -1430,8 +1428,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount, /// genLoopLimit - Help LinearFunctionTestReplace by generating a value that /// holds the RHS of the new loop test. static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, - SCEVExpander &Rewriter, ScalarEvolution *SE, - Type *IntPtrTy) { + SCEVExpander &Rewriter, ScalarEvolution *SE) { const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); const SCEV *IVInit = AR->getStart(); @@ -1457,8 +1454,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // We could handle pointer IVs other than i8*, but we need to compensate for // gep index scaling. See canExpandBackedgeTakenCount comments. assert(SE->getSizeOfExpr( - cast<PointerType>(GEPBase->getType())->getElementType(), - IntPtrTy)->isOne() + cast<PointerType>(GEPBase->getType())->getElementType())->isOne() && "unit stride pointer IV must be i8*"); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); @@ -1557,9 +1553,7 @@ LinearFunctionTestReplace(Loop *L, CmpIndVar = IndVar; } - Type *IntPtrTy = TD ? TD->getIntPtrType(IndVar->getType()) : - IntegerType::getInt64Ty(IndVar->getContext()); - Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE, IntPtrTy); + Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() && "genLoopLimit missed a cast"); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e4b40f3d3a..a44e798f12 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -486,9 +486,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // would be unsafe to do if there is anything else in the loop that may read // or write to the aliased location. Check for any overlap by generating the // base pointer and checking the region. - assert(DestPtr->getType()->isPointerTy() - && "Must be a pointer type."); - unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); @@ -507,7 +505,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(DestPtr->getType()); + Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), @@ -613,7 +611,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(SI->getType()); + Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 97fff9edd6..517657cf52 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -174,11 +174,10 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { // this width can be stored. If so, check to see whether we will end up // actually reducing the number of stores used. unsigned Bytes = unsigned(End-Start); - unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace(); - unsigned NumPointerStores = Bytes/TD.getPointerSize(AS); + unsigned NumPointerStores = Bytes/TD.getPointerSize(); // Assume the remaining bytes if any are done a byte at a time. - unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS); + unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index af3a880cb9..d95c855ce7 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -444,7 +444,6 @@ protected: bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { GEPOffset = Offset; - unsigned int AS = GEPI.getPointerAddressSpace(); for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); GTI != GTE; ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); @@ -474,7 +473,7 @@ protected: continue; } - APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS)); + APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); Index *= APInt(Index.getBitWidth(), TD.getTypeAllocSize(GTI.getIndexedType())); Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, @@ -2395,9 +2394,7 @@ private: Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); - assert(PointerTy->isPointerTy() && - "Type must be pointer type!"); - APInt Offset(TD.getTypeSizeInBits(PointerTy), BeginOffset - NewAllocaBeginOffset); + APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } @@ -2490,9 +2487,6 @@ private: assert(OldOp == OldPtr); IRBuilder<> IRB(&LI); - if (VecTy) - return rewriteVectorizedLoadInst(IRB, LI, OldOp); - uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(LI.getType())) { assert(!LI.isVolatile()); @@ -2502,7 +2496,7 @@ private: TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide loads can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && @@ -2524,18 +2518,20 @@ private: // the computed value, and then replace the placeholder with LI, leaving // LI only used for this computation. Value *Placeholder - = IRB.CreateLoad(UndefValue::get(LI.getType()->getPointerTo())); + = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, getName(".insert")); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); - cast<Instruction>(Placeholder)->eraseFromParent(); + delete Placeholder; if (Pass.DeadSplitInsts.insert(&LI)) Pass.DeadInsts.push_back(&LI); DEBUG(dbgs() << " to: " << *V << "\n"); return IsConvertable; } + if (VecTy) + return rewriteVectorizedLoadInst(IRB, LI, OldOp); if (IntTy && LI.getType()->isIntegerTy()) return rewriteIntegerLoad(IRB, LI); @@ -2795,9 +2791,8 @@ private: const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(II); - assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!"); // Compute the relative offset within the transfer. - unsigned IntPtrWidth = TD.getTypeSizeInBits(OldPtr->getType()); + unsigned IntPtrWidth = TD.getPointerSizeInBits(); APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin : MTO.SourceBegin)); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index a5446294e3..a46d09c320 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType())); + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 84b820b5ce..9f24bb635e 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -35,6 +35,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/TargetTransformInfo.h" using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); @@ -293,7 +294,8 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. -static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { +static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD, + const TargetTransformInfo *TTI) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -302,7 +304,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TD)) { + if (SimplifyCFG(BBIt++, TD, TTI)) { LocalChange = true; ++NumSimpl; } @@ -317,9 +319,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD) { // bool CFGSimplifyPass::runOnFunction(Function &F) { const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); + const TargetTransformInfo *TTI = + getAnalysisIfAvailable<TargetTransformInfo>(); bool EverChanged = removeUnreachableBlocksFromFn(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TD); + EverChanged |= iterativelySimplifyCFG(F, TD, TTI); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -333,7 +337,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { return true; do { - EverChanged = iterativelySimplifyCFG(F, TD); + EverChanged = iterativelySimplifyCFG(F, TD, TTI); EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index f3448bcd87..bacada58c1 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -90,22 +90,6 @@ public: // Helper Functions //===----------------------------------------------------------------------===// -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) - if (IC->isEquality()) - if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static bool CallHasFloatingPointArgument(const CallInst *CI) { for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); it != e; ++it) { @@ -135,189 +119,6 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { namespace { //===---------------------------------------===// -// 'stpcpy' Optimizations - -struct StpCpyOpt: public LibCallOptimization { - bool OptChkCall; // True if it's optimizing a __stpcpy_chk libcall. - - StpCpyOpt(bool c) : OptChkCall(c) {} - - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify the "stpcpy" function prototype. - unsigned NumParams = OptChkCall ? 3 : 2; - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != NumParams || - FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy()) - return 0; - - // These optimizations require DataLayout. - if (!TD) return 0; - - Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) - Value *StrLen = EmitStrLen(Src, B, TD, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; - } - - // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; - - Type *PT = FT->getParamType(0); - Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); - Value *DstEnd = B.CreateGEP(Dst, - ConstantInt::get(TD->getIntPtrType(PT), - Len - 1)); - - // We have enough information to now generate the memcpy call to do the - // copy for us. Make a memcpy to copy the nul byte with align = 1. - if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, - TD, TLI)) - B.CreateMemCpy(Dst, Src, LenV, 1); - return DstEnd; - } -}; - -//===---------------------------------------===// -// 'strncpy' Optimizations - -struct StrNCpyOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getParamType(2)->isIntegerTy()) - return 0; - - Value *Dst = CI->getArgOperand(0); - Value *Src = CI->getArgOperand(1); - Value *LenOp = CI->getArgOperand(2); - - // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; - --SrcLen; - - if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(x, '\0', y, 1) - B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); - return Dst; - } - - uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) - Len = LengthArg->getZExtValue(); - else - return 0; - - if (Len == 0) return Dst; // strncpy(x, y, 0) -> x - - // These optimizations require DataLayout. - if (!TD) return 0; - - // Let strncpy handle the zero padding - if (Len > SrcLen+1) return 0; - - Type *PT = FT->getParamType(0); - // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(PT), Len), 1); - - return Dst; - } -}; - -//===---------------------------------------===// -// 'strlen' Optimizations - -struct StrLenOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || - FT->getParamType(0) != B.getInt8PtrTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - - Value *Src = CI->getArgOperand(0); - - // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src)) - return ConstantInt::get(CI->getType(), Len-1); - - // strlen(x) != 0 --> *x != 0 - // strlen(x) == 0 --> *x == 0 - if (IsOnlyUsedInZeroEqualityComparison(CI)) - return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); - return 0; - } -}; - - -//===---------------------------------------===// -// 'strpbrk' Optimizations - -struct StrPBrkOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || - FT->getParamType(0) != B.getInt8PtrTy() || - FT->getParamType(1) != FT->getParamType(0) || - FT->getReturnType() != FT->getParamType(0)) - return 0; - - StringRef S1, S2; - bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); - bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); - - // strpbrk(s, "") -> NULL - // strpbrk("", s) -> NULL - if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) - return Constant::getNullValue(CI->getType()); - - // Constant folding. - if (HasS1 && HasS2) { - size_t I = S1.find_first_of(S2); - if (I == std::string::npos) // No match. - return Constant::getNullValue(CI->getType()); - - return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); - } - - // strpbrk(s, "a") -> strchr(s, 'a') - if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); - - return 0; - } -}; - -//===---------------------------------------===// -// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. - -struct StrToOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - FunctionType *FT = Callee->getFunctionType(); - if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || - !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy()) - return 0; - - Value *EndPtr = CI->getArgOperand(1); - if (isa<ConstantPointerNull>(EndPtr)) { - // With a null EndPtr, this function won't capture the main argument. - // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attributes::get(Callee->getContext(), - Attributes::NoCapture)); - } - - return 0; - } -}; - -//===---------------------------------------===// // 'strspn' Optimizations struct StrSpnOpt : public LibCallOptimization { @@ -510,11 +311,10 @@ struct MemCpyOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -533,11 +333,10 @@ struct MemMoveOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) @@ -556,11 +355,10 @@ struct MemSetOpt : public LibCallOptimization { if (!TD) return 0; FunctionType *FT = Callee->getFunctionType(); - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) @@ -985,9 +783,8 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - Type *AT = CI->getArgOperand(0)->getType(); B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(AT), // Copy the + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the FormatStr.size() + 1), 1); // nul byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1114,9 +911,8 @@ struct FPutsOpt : public LibCallOptimization { uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; // Known to have no uses (see above). - Type *PT = FT->getParamType(0); return EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(PT), Len-1), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), CI->getArgOperand(1), B, TD, TLI); } }; @@ -1141,9 +937,8 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; - Type *AT = CI->getArgOperand(1)->getType(); Value *NewCI = EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(AT), + ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), CI->getArgOperand(0), B, TD, TLI); return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; @@ -1242,10 +1037,7 @@ namespace { StringMap<LibCallOptimization*> Optimizations; // String and Memory LibCall Optimizations - StpCpyOpt StpCpy; StpCpyOpt StpCpyChk; - StrNCpyOpt StrNCpy; - StrLenOpt StrLen; StrPBrkOpt StrPBrk; - StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; + StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; @@ -1261,8 +1053,8 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(ID), StpCpy(false), StpCpyChk(true), - UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) { + SimplifyLibCalls() : FunctionPass(ID), UnaryDoubleFP(false), + UnsafeUnaryDoubleFP(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); @@ -1313,17 +1105,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2, /// we know. void SimplifyLibCalls::InitOptimizations() { // String and Memory LibCall Optimizations - Optimizations["strncpy"] = &StrNCpy; - Optimizations["stpcpy"] = &StpCpy; - Optimizations["strlen"] = &StrLen; - Optimizations["strpbrk"] = &StrPBrk; - Optimizations["strtol"] = &StrTo; - Optimizations["strtod"] = &StrTo; - Optimizations["strtof"] = &StrTo; - Optimizations["strtoul"] = &StrTo; - Optimizations["strtoll"] = &StrTo; - Optimizations["strtold"] = &StrTo; - Optimizations["strtoull"] = &StrTo; Optimizations["strspn"] = &StrSpn; Optimizations["strcspn"] = &StrCSpn; Optimizations["strstr"] = &StrStr; @@ -1332,9 +1113,6 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["memmove"] = &MemMove; AddOpt(LibFunc::memset, &MemSet); - // _chk variants of String and Memory LibCall Optimizations. - Optimizations["__stpcpy_chk"] = &StpCpyChk; - // Math Library Optimizations Optimizations["cosf"] = &Cos; Optimizations["cos"] = &Cos; diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index bd28f10654..fa2faa2dad 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -46,8 +46,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), - TD->getIntPtrType(Ptr->getType()), + TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); @@ -72,10 +73,11 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), - TD->getIntPtrType(Ptr->getType()), + TD->getIntPtrType(Context), B.getInt8PtrTy(), - TD->getIntPtrType(Ptr->getType()), + TD->getIntPtrType(Context), NULL); CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) @@ -124,12 +126,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Ptr1->getType()), - NULL); + TD->getIntPtrType(Context), NULL); CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "strncmp"); @@ -199,14 +201,14 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, AttributeWithIndex AWI; AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Dst->getType()), - TD->getIntPtrType(Src->getType()), - NULL); + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), NULL); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize); @@ -228,11 +230,12 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind }; AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), - TD->getIntPtrType(Ptr->getType()), + TD->getIntPtrType(Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -257,12 +260,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, ArrayRef<Attributes::AttrVal>(AVs, 2)); + LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), - TD->getIntPtrType(Ptr1->getType()), - NULL); + TD->getIntPtrType(Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -422,24 +425,24 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture); AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex, Attributes::NoUnwind); + LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; - Type *PtrTy = Ptr->getType(); if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), - TD->getIntPtrType(PtrTy), + TD->getIntPtrType(Context), B.getInt8PtrTy(), - TD->getIntPtrType(PtrTy), - TD->getIntPtrType(PtrTy), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), File->getType(), NULL); else - F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(PtrTy), + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), B.getInt8PtrTy(), - TD->getIntPtrType(PtrTy), - TD->getIntPtrType(PtrTy), + TD->getIntPtrType(Context), + TD->getIntPtrType(Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(PtrTy), 1), File); + ConstantInt::get(TD->getIntPtrType(Context), 1), File); if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -461,13 +464,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, IRBuilder<> B(CI); if (Name == "__memcpy_chk") { - Type *PT = FT->getParamType(0); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT) || - FT->getParamType(3) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return false; if (isFoldable(3, 2, false)) { @@ -486,12 +488,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__memmove_chk") { // Check if this has the right signature. - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT) || - FT->getParamType(3) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return false; if (isFoldable(3, 2, false)) { @@ -505,12 +506,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__memset_chk") { // Check if this has the right signature. - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(PT) || - FT->getParamType(3) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return false; if (isFoldable(3, 2, false)) { @@ -525,12 +525,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") { // Check if this has the right signature. - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(PT)) + FT->getParamType(2) != TD->getIntPtrType(Context)) return 0; @@ -552,12 +551,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") { // Check if this has the right signature. - Type *PT = FT->getParamType(0); if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(PT)) + FT->getParamType(3) != TD->getIntPtrType(Context)) return false; if (isFoldable(3, 2, false)) { diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b654111eba..5e05c83c35 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -53,6 +53,8 @@ namespace { // Cached analysis information for the current function. DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; Loop *L; @@ -117,6 +119,8 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { L = TheLoop; DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); // Get the set of exiting blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -156,6 +160,12 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { MadeChange |= ProcessInstruction(I, ExitBlocks); } } + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && MadeChange) + SE->forgetLoop(L); assert(L->isLCSSAForm(*DT)); PredCache.clear(); @@ -245,7 +255,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); } - + // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { @@ -260,6 +270,9 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UsesToRewrite[i]->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); UsesToRewrite[i]->set(UserBB->begin()); continue; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index c09d982d65..a954d82c05 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -806,7 +806,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout *TD) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : 64; + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD); unsigned TrailZ = KnownZero.countTrailingOnes(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 0bc185d8b7..9d9e201665 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -46,6 +46,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" @@ -89,6 +90,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DependenceAnalysis>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -194,6 +196,11 @@ ReprocessLoop: BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + Changed = true; } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 870e2b2ade..9823433e86 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DataLayout.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/IRBuilder.h" @@ -39,7 +40,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> @@ -82,6 +83,7 @@ namespace { class SimplifyCFGOpt { const DataLayout *const TD; + const TargetTransformInfo *const TTI; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -101,7 +103,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - explicit SimplifyCFGOpt(const DataLayout *td) : TD(td) {} + SimplifyCFGOpt(const DataLayout *td, const TargetTransformInfo *tti) + : TD(td), TTI(tti) {} bool run(BasicBlock *BB); }; } @@ -392,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = TD->getIntPtrType(V->getType()); + IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -532,13 +535,9 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { CV = ICI->getOperand(0); // Unwrap any lossless ptrtoint cast. - if (TD && CV) { - PtrToIntInst *PTII = NULL; - if ((PTII = dyn_cast<PtrToIntInst>(CV)) && - CV->getType() == TD->getIntPtrType(CV->getContext(), - PTII->getPointerAddressSpace())) + if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext())) + if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) CV = PTII->getOperand(0); - } return CV; } @@ -985,7 +984,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); - CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()), + CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), "magicptr"); } @@ -2713,7 +2712,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, if (CompVal->getType()->isPointerTy()) { assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, - TD->getIntPtrType(CompVal->getType()), + TD->getIntPtrType(CompVal->getContext()), "magicptr"); } @@ -3197,26 +3196,95 @@ static bool ValidLookupTableConstant(Constant *C) { isa<UndefValue>(C); } -/// GetCaseResulsts - Try to determine the resulting constant values in phi -/// nodes at the common destination basic block for one of the case -/// destinations of a switch instruction. +/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant *LookupConstant(Value *V, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// ConstantFold - Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant *ConstantFold(Instruction *I, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::get(BO->getOpcode(), A, B); + } + + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(I->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); + } + + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return 0; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return 0; + } + + if (CastInst *Cast = dyn_cast<CastInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); + } + + return 0; +} + +/// GetCaseResults - Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. static bool GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); - // If CaseDest is empty, continue to its successor. - if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && - !isa<PHINode>(CaseDest->begin())) { - - TerminatorInst *Terminator = CaseDest->getTerminator(); - if (Terminator->getNumSuccessors() != 1) - return false; - - Pred = CaseDest; - CaseDest = Terminator->getSuccessor(0); + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value*, Constant*> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa<DbgInfoIntrinsic>(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(I, ConstantPool)) { + // Instruction is side-effect free and constant. + ConstantPool.insert(std::make_pair(I, C)); + } else { + break; + } } // If we did not have a CommonDest before, use the current one. @@ -3233,10 +3301,17 @@ static bool GetCaseResults(SwitchInst *SI, if (Idx == -1) continue; - Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx)); + Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), + ConstantPool); if (!ConstVal) return false; + // Note: If the constant comes from constant-propagating the case value + // through the CaseDest basic block, it will be safe to remove the + // instructions in that block. They cannot be used (except in the phi nodes + // we visit) outside CaseDest, because that block does not dominate its + // successor. If it did, we would not be in this phi node. + // Be conservative about which kinds of constants we support. if (!ValidLookupTableConstant(ConstVal)) return false; @@ -3326,7 +3401,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) - SingleValue = NULL; + SingleValue = 0; } // Fill in any holes in the table with the default result. @@ -3337,7 +3412,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } if (DefaultValue != SingleValue) - SingleValue = NULL; + SingleValue = 0; } // If each element in the table contains the same value, we only need to store @@ -3459,9 +3534,12 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, - const DataLayout* TD) { + const DataLayout* TD, + const TargetTransformInfo *TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - // FIXME: Handle unreachable cases. + + if (TTI && !TTI->getScalarTargetTransformInfo()->shouldBuildLookupTables()) + return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. @@ -3484,7 +3562,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MinCaseVal = CI.getCaseValue(); ConstantInt *MaxCaseVal = CI.getCaseValue(); - BasicBlock *CommonDest = NULL; + BasicBlock *CommonDest = 0; typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; SmallDenseMap<PHINode*, ResultListTy> ResultLists; SmallDenseMap<PHINode*, Constant*> DefaultResults; @@ -3501,7 +3579,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Resulting value at phi nodes for this case value. typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; ResultsTy Results; - if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results)) return false; // Append the result from this case to the list for each phi. @@ -3514,7 +3593,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Get the resulting values for the default case. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; - if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList)) return false; for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; @@ -3583,32 +3663,30 @@ static bool SwitchToLookupTable(SwitchInst *SI, } bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - // If this switch is too complex to want to look at, ignore it. - if (!isValueEqualityComparison(SI)) - return false; - BasicBlock *BB = SI->getParent(); - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB) | true; - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB) | true; + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB) | true; + } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) @@ -3621,7 +3699,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder, TD)) + if (SwitchToLookupTable(SI, Builder, TD, TTI)) return SimplifyCFG(BB) | true; return false; @@ -3918,6 +3996,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD) { - return SimplifyCFGOpt(TD).run(BB); +bool llvm::SimplifyCFG(BasicBlock *BB, const DataLayout *TD, + const TargetTransformInfo *TTI) { + return SimplifyCFGOpt(TD, TTI).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 162b29e829..581b8d3ea2 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -64,6 +64,26 @@ public: }; //===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool isOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// // Fortified Library Call Optimizations //===----------------------------------------------------------------------===// @@ -102,13 +122,14 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || - FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1))) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return 0; if (isFoldable(3, 2, false)) { @@ -124,13 +145,14 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || - FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(1))) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return 0; if (isFoldable(3, 2, false)) { @@ -146,13 +168,14 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); // Check if this has the right signature. if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)) || - FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0))) + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) return 0; if (isFoldable(3, 2, false)) { @@ -177,7 +200,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || - FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + FT->getParamType(2) != TD->getIntPtrType(Context)) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); @@ -185,8 +208,8 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { return Src; // If a) we don't have any length information, or b) we know this will - // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our - // st[rp]cpy_chk call which may fail at runtime if the size is too long. + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. if (isFoldable(2, 1, true)) { @@ -202,14 +225,64 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { Value *Ret = EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(Dst->getType()), - Len), CI->getArgOperand(2), B, TD, TLI); + ConstantInt::get(TD->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, TD, TLI); return Ret; } return 0; } }; +struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain stpcpy. Otherwise we'll keep our + // stpcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI)) + return 0; + return DstEnd; + } + return 0; + } +}; + struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { this->CI = CI; @@ -222,7 +295,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || - FT->getParamType(3) != TD->getIntPtrType(FT->getParamType(0))) + FT->getParamType(3) != TD->getIntPtrType(Context)) return 0; if (isFoldable(3, 2, false)) { @@ -284,8 +357,7 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(Src->getType()), - Len + 1), 1); + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); return Dst; } }; @@ -357,9 +429,8 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; - Type *PT = FT->getParamType(0); return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. - ConstantInt::get(TD->getIntPtrType(PT), Len), + ConstantInt::get(TD->getIntPtrType(*Context), Len), B, TD, TLI); } @@ -453,9 +524,8 @@ struct StrCmpOpt : public LibCallOptimization { // These optimizations require DataLayout. if (!TD) return 0; - Type *PT = FT->getParamType(0); return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(PT), + ConstantInt::get(TD->getIntPtrType(*Context), std::min(Len1, Len2)), B, TD, TLI); } @@ -537,11 +607,171 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(Dst->getType()), Len), 1); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + +struct StpCpyOpt: public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "stpcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; + } +}; + +struct StrNCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // These optimizations require DataLayout. + if (!TD) return 0; + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + Type *PT = FT->getParamType(0); + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(PT), Len), 1); + return Dst; } }; +struct StrLenOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (isOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; + } +}; + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == std::string::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); + + return 0; + } +}; + +struct StrToOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) + return 0; + + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attributes::get(Callee->getContext(), + Attributes::NoCapture)); + } + + return 0; + } +}; + } // End anonymous namespace. namespace llvm { @@ -556,6 +786,7 @@ class LibCallSimplifierImpl { MemMoveChkOpt MemMoveChk; MemSetChkOpt MemSetChk; StrCpyChkOpt StrCpyChk; + StpCpyChkOpt StpCpyChk; StrNCpyChkOpt StrNCpyChk; // String and memory library call optimizations. @@ -566,6 +797,11 @@ class LibCallSimplifierImpl { StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; + StpCpyOpt StpCpy; + StrNCpyOpt StrNCpy; + StrLenOpt StrLen; + StrPBrkOpt StrPBrk; + StrToOpt StrTo; void initOptimizations(); public: @@ -583,7 +819,7 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["__memmove_chk"] = &MemMoveChk; Optimizations["__memset_chk"] = &MemSetChk; Optimizations["__strcpy_chk"] = &StrCpyChk; - Optimizations["__stpcpy_chk"] = &StrCpyChk; + Optimizations["__stpcpy_chk"] = &StpCpyChk; Optimizations["__strncpy_chk"] = &StrNCpyChk; Optimizations["__stpncpy_chk"] = &StrNCpyChk; @@ -595,6 +831,17 @@ void LibCallSimplifierImpl::initOptimizations() { Optimizations["strcmp"] = &StrCmp; Optimizations["strncmp"] = &StrNCmp; Optimizations["strcpy"] = &StrCpy; + Optimizations["stpcpy"] = &StpCpy; + Optimizations["strncpy"] = &StrNCpy; + Optimizations["strlen"] = &StrLen; + Optimizations["strpbrk"] = &StrPBrk; + Optimizations["strtol"] = &StrTo; + Optimizations["strtod"] = &StrTo; + Optimizations["strtof"] = &StrTo; + Optimizations["strtoul"] = &StrTo; + Optimizations["strtoll"] = &StrTo; + Optimizations["strtold"] = &StrTo; + Optimizations["strtoull"] = &StrTo; } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 81125f22a6..4653a7d7c8 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -43,16 +43,26 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" #include "llvm/DataLayout.h" +#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" #include <algorithm> #include <map> using namespace llvm; +static cl::opt<bool> +IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), + cl::Hidden, cl::desc("Ignore target information")); + static cl::opt<unsigned> ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden, cl::desc("The required chain depth for vectorization")); +static cl::opt<bool> +UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false), + cl::Hidden, cl::desc("Use the chain depth requirement with" + " target information")); + static cl::opt<unsigned> SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden, cl::desc("The maximum search distance for instruction pairs")); @@ -94,8 +104,9 @@ static cl::opt<bool> NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values")); +// FIXME: This should default to false once pointer vector support works. static cl::opt<bool> -NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden, +NoPointers("bb-vectorize-no-pointers", cl::init(/*false*/ true), cl::Hidden, cl::desc("Don't try to vectorize pointer values")); static cl::opt<bool> @@ -160,6 +171,12 @@ DebugCycleCheck("bb-vectorize-debug-cycle-check", cl::init(false), cl::Hidden, cl::desc("When debugging is enabled, output information on the" " cycle-checking process")); + +static cl::opt<bool> +PrintAfterEveryPair("bb-vectorize-debug-print-after-every-pair", + cl::init(false), cl::Hidden, + cl::desc("When debugging is enabled, dump the basic block after" + " every pair is fused")); #endif STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); @@ -181,11 +198,16 @@ namespace { DT = &P->getAnalysis<DominatorTree>(); SE = &P->getAnalysis<ScalarEvolution>(); TD = P->getAnalysisIfAvailable<DataLayout>(); + TTI = IgnoreTargetInfo ? 0 : + P->getAnalysisIfAvailable<TargetTransformInfo>(); + VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0; } typedef std::pair<Value *, Value *> ValuePair; + typedef std::pair<ValuePair, int> ValuePairWithCost; typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair + typedef std::pair<VPPair, unsigned> VPPairWithType; typedef std::pair<std::multimap<Value *, Value *>::iterator, std::multimap<Value *, Value *>::iterator> VPIteratorPair; typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, @@ -196,6 +218,8 @@ namespace { DominatorTree *DT; ScalarEvolution *SE; DataLayout *TD; + TargetTransformInfo *TTI; + const VectorTargetTransformInfo *VTTI; // FIXME: const correct? @@ -204,11 +228,23 @@ namespace { bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap<Value *, Value *> &CandidatePairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len); + // FIXME: The current implementation does not account for pairs that + // are connected in multiple ways. For example: + // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap) + enum PairConnectionType { + PairConnectionDirect, + PairConnectionSwap, + PairConnectionSplat + }; + void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs); + std::multimap<ValuePair, ValuePair> &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes); void buildDepMap(BasicBlock &BB, std::multimap<Value *, Value *> &CandidatePairs, @@ -216,19 +252,29 @@ namespace { DenseSet<ValuePair> &PairableInstUsers); void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, DenseSet<ValuePair> &PairableInstUsers, DenseMap<Value *, Value *>& ChosenPairs); void fuseChosenPairs(BasicBlock &BB, std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *>& ChosenPairs); + DenseMap<Value *, Value *>& ChosenPairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps); + bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore, bool NonPow2Len); + bool IsSimpleLoadStore, bool NonPow2Len, + int &CostSavings, int &FixedOrder); bool trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, @@ -239,6 +285,7 @@ namespace { std::multimap<Value *, Value *> &CandidatePairs, std::vector<Value *> &PairableInsts, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, ValuePair P); bool pairsConflict(ValuePair P, ValuePair Q, @@ -270,17 +317,21 @@ namespace { void findBestTreeFor( std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, DenseSet<ValuePair> &PairableInstUsers, std::multimap<ValuePair, ValuePair> &PairableInstUserMap, DenseMap<Value *, Value *> &ChosenPairs, DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, - size_t &BestEffSize, VPIteratorPair ChoiceRange, + int &BestEffSize, VPIteratorPair ChoiceRange, bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool FlipMemInputs); + Instruction *J, unsigned o); void fillNewShuffleMask(LLVMContext& Context, Instruction *J, unsigned MaskOffset, unsigned NumInElem, @@ -292,20 +343,20 @@ namespace { bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, Value *&LOp, unsigned numElemL, - Type *ArgTypeL, Type *ArgTypeR, + Type *ArgTypeL, Type *ArgTypeR, bool IBeforeJ, unsigned IdxOff = 0); Value *getReplacementInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool FlipMemInputs); + Instruction *J, unsigned o, bool IBeforeJ); void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, - bool FlipMemInputs); + bool IBeforeJ); void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, - Instruction *&K2, bool FlipMemInputs); + Instruction *&K2); void collectPairLoadMoveSet(BasicBlock &BB, DenseMap<Value *, Value *> &ChosenPairs, @@ -317,10 +368,6 @@ namespace { DenseMap<Value *, Value *> &ChosenPairs, std::multimap<Value *, Value *> &LoadMoveSet); - void collectPtrInfo(std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<Value *> &LowPtrInsts); - bool canMoveUsesOfIAfterJ(BasicBlock &BB, std::multimap<Value *, Value *> &LoadMoveSet, Instruction *I, Instruction *J); @@ -339,13 +386,16 @@ namespace { return false; } + DEBUG(if (VTTI) dbgs() << "BBV: using target information\n"); + bool changed = false; // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. unsigned n = 1; for (unsigned v = 2; - v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); + (VTTI || v <= Config.VectorBits) && + (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { DEBUG(dbgs() << "BBV: fusing loop #" << n << " for " << BB.getName() << " in " << @@ -375,6 +425,9 @@ namespace { DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); TD = getAnalysisIfAvailable<DataLayout>(); + TTI = IgnoreTargetInfo ? 0 : + getAnalysisIfAvailable<TargetTransformInfo>(); + VTTI = TTI ? TTI->getVectorTargetTransformInfo() : 0; return vectorizeBB(BB); } @@ -427,6 +480,10 @@ namespace { T2 = cast<CastInst>(I)->getSrcTy(); else T2 = T1; + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + T2 = SI->getCondition()->getType(); + } } // Returns the weight associated with the provided value. A chain of @@ -458,6 +515,62 @@ namespace { return 1; } + // Returns the cost of the provided instruction using VTTI. + // This does not handle loads and stores. + unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2) { + switch (Opcode) { + default: break; + case Instruction::GetElementPtr: + // We mark this instruction as zero-cost because scalar GEPs are usually + // lowered to the intruction addressing mode. At the moment we don't + // generate vector GEPs. + return 0; + case Instruction::Br: + return VTTI->getCFInstrCost(Opcode); + case Instruction::PHI: + return 0; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + return VTTI->getArithmeticInstrCost(Opcode, T1); + case Instruction::Select: + case Instruction::ICmp: + case Instruction::FCmp: + return VTTI->getCmpSelInstrCost(Opcode, T1, T2); + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::ShuffleVector: + return VTTI->getCastInstrCost(Opcode, T1, T2); + } + + return 1; + } + // This determines the relative offset of two loads or stores, returning // true if the offset could be determined to be some constant value. // For example, if OffsetInElmts == 1, then J accesses the memory directly @@ -465,20 +578,30 @@ namespace { // directly after J. bool getPairPtrInfo(Instruction *I, Instruction *J, Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, - int64_t &OffsetInElmts) { + unsigned &IAddressSpace, unsigned &JAddressSpace, + int64_t &OffsetInElmts, bool ComputeOffset = true) { OffsetInElmts = 0; - if (isa<LoadInst>(I)) { - IPtr = cast<LoadInst>(I)->getPointerOperand(); - JPtr = cast<LoadInst>(J)->getPointerOperand(); - IAlignment = cast<LoadInst>(I)->getAlignment(); - JAlignment = cast<LoadInst>(J)->getAlignment(); + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + LoadInst *LJ = cast<LoadInst>(J); + IPtr = LI->getPointerOperand(); + JPtr = LJ->getPointerOperand(); + IAlignment = LI->getAlignment(); + JAlignment = LJ->getAlignment(); + IAddressSpace = LI->getPointerAddressSpace(); + JAddressSpace = LJ->getPointerAddressSpace(); } else { - IPtr = cast<StoreInst>(I)->getPointerOperand(); - JPtr = cast<StoreInst>(J)->getPointerOperand(); - IAlignment = cast<StoreInst>(I)->getAlignment(); - JAlignment = cast<StoreInst>(J)->getAlignment(); + StoreInst *SI = cast<StoreInst>(I), *SJ = cast<StoreInst>(J); + IPtr = SI->getPointerOperand(); + JPtr = SJ->getPointerOperand(); + IAlignment = SI->getAlignment(); + JAlignment = SJ->getAlignment(); + IAddressSpace = SI->getPointerAddressSpace(); + JAddressSpace = SJ->getPointerAddressSpace(); } + if (!ComputeOffset) + return true; + const SCEV *IPtrSCEV = SE->getSCEV(IPtr); const SCEV *JPtrSCEV = SE->getSCEV(JPtr); @@ -558,11 +681,18 @@ namespace { std::vector<Value *> AllPairableInsts; DenseMap<Value *, Value *> AllChosenPairs; + DenseSet<ValuePair> AllFixedOrderPairs; + DenseMap<VPPair, unsigned> AllPairConnectionTypes; + std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps; do { std::vector<Value *> PairableInsts; std::multimap<Value *, Value *> CandidatePairs; + DenseSet<ValuePair> FixedOrderPairs; + DenseMap<ValuePair, int> CandidatePairCostSavings; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, + FixedOrderPairs, + CandidatePairCostSavings, PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; @@ -575,10 +705,18 @@ namespace { // Note that it only matters that both members of the second pair use some // element of the first pair (to allow for splatting). - std::multimap<ValuePair, ValuePair> ConnectedPairs; - computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs); + std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps; + DenseMap<VPPair, unsigned> PairConnectionTypes; + computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs, + PairConnectionTypes); if (ConnectedPairs.empty()) continue; + for (std::multimap<ValuePair, ValuePair>::iterator + I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); + I != IE; ++I) { + ConnectedPairDeps.insert(VPPair(I->second, I->first)); + } + // Build the pairable-instruction dependency map DenseSet<ValuePair> PairableInstUsers; buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); @@ -590,13 +728,48 @@ namespace { // variables. DenseMap<Value *, Value *> ChosenPairs; - choosePairs(CandidatePairs, PairableInsts, ConnectedPairs, + choosePairs(CandidatePairs, CandidatePairCostSavings, + PairableInsts, FixedOrderPairs, PairConnectionTypes, + ConnectedPairs, ConnectedPairDeps, PairableInstUsers, ChosenPairs); if (ChosenPairs.empty()) continue; AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), PairableInsts.end()); AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); + + // Only for the chosen pairs, propagate information on fixed-order pairs, + // pair connections, and their types to the data structures used by the + // pair fusion procedures. + for (DenseMap<Value *, Value *>::iterator I = ChosenPairs.begin(), + IE = ChosenPairs.end(); I != IE; ++I) { + if (FixedOrderPairs.count(*I)) + AllFixedOrderPairs.insert(*I); + else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) + AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); + + for (DenseMap<Value *, Value *>::iterator J = ChosenPairs.begin(); + J != IE; ++J) { + DenseMap<VPPair, unsigned>::iterator K = + PairConnectionTypes.find(VPPair(*I, *J)); + if (K != PairConnectionTypes.end()) { + AllPairConnectionTypes.insert(*K); + } else { + K = PairConnectionTypes.find(VPPair(*J, *I)); + if (K != PairConnectionTypes.end()) + AllPairConnectionTypes.insert(*K); + } + } + } + + for (std::multimap<ValuePair, ValuePair>::iterator + I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); + I != IE; ++I) { + if (AllPairConnectionTypes.count(*I)) { + AllConnectedPairs.insert(*I); + AllConnectedPairDeps.insert(VPPair(I->second, I->first)); + } + } } while (ShouldContinue); if (AllChosenPairs.empty()) return false; @@ -609,7 +782,9 @@ namespace { // replaced with a vector_extract on the result. Subsequent optimization // passes should coalesce the build/extract combinations. - fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); + fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs, + AllPairConnectionTypes, + AllConnectedPairs, AllConnectedPairDeps); // It is important to cleanup here so that future iterations of this // function have less work to do. @@ -679,15 +854,22 @@ namespace { !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) { + if (T1->getScalarSizeInBits() == 1) { if (!Config.VectorizeBools) return false; } else { - if (!Config.VectorizeInts - && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + if (!Config.VectorizeInts && T1->isIntOrIntVectorTy()) return false; } - + + if (T2->getScalarSizeInBits() == 1) { + if (!Config.VectorizeBools) + return false; + } else { + if (!Config.VectorizeInts && T2->isIntOrIntVectorTy()) + return false; + } + if (!Config.VectorizeFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; @@ -703,8 +885,8 @@ namespace { T2->getScalarType()->isPointerTy())) return false; - if (T1->getPrimitiveSizeInBits() >= Config.VectorBits || - T2->getPrimitiveSizeInBits() >= Config.VectorBits) + if (!VTTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits || + T2->getPrimitiveSizeInBits() >= Config.VectorBits)) return false; return true; @@ -715,10 +897,14 @@ namespace { // that I has already been determined to be vectorizable and that J is not // in the use tree of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore, bool NonPow2Len) { + bool IsSimpleLoadStore, bool NonPow2Len, + int &CostSavings, int &FixedOrder) { DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << " <-> " << *J << "\n"); + CostSavings = 0; + FixedOrder = 0; + // Loads and stores can be merged if they have different alignments, // but are otherwise the same. if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | @@ -731,38 +917,83 @@ namespace { unsigned MaxTypeBits = std::max( IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); - if (MaxTypeBits > Config.VectorBits) + if (!VTTI && MaxTypeBits > Config.VectorBits) return false; // FIXME: handle addsub-type operations! if (IsSimpleLoadStore) { Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment; + unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + IAddressSpace, JAddressSpace, OffsetInElmts) && abs64(OffsetInElmts) == 1) { - if (Config.AlignedOnly) { - Type *aTypeI = isa<StoreInst>(I) ? - cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); - Type *aTypeJ = isa<StoreInst>(J) ? - cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); + FixedOrder = (int) OffsetInElmts; + unsigned BottomAlignment = IAlignment; + if (OffsetInElmts < 0) BottomAlignment = JAlignment; + Type *aTypeI = isa<StoreInst>(I) ? + cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); + Type *aTypeJ = isa<StoreInst>(J) ? + cast<StoreInst>(J)->getValueOperand()->getType() : J->getType(); + Type *VType = getVecTypeForPair(aTypeI, aTypeJ); + + if (Config.AlignedOnly) { // An aligned load or store is possible only if the instruction // with the lower offset has an alignment suitable for the // vector type. - unsigned BottomAlignment = IAlignment; - if (OffsetInElmts < 0) BottomAlignment = JAlignment; - - Type *VType = getVecTypeForPair(aTypeI, aTypeJ); unsigned VecAlignment = TD->getPrefTypeAlignment(VType); if (BottomAlignment < VecAlignment) return false; } + + if (VTTI) { + unsigned ICost = VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + IAlignment, IAddressSpace); + unsigned JCost = VTTI->getMemoryOpCost(J->getOpcode(), J->getType(), + JAlignment, JAddressSpace); + unsigned VCost = VTTI->getMemoryOpCost(I->getOpcode(), VType, + BottomAlignment, + IAddressSpace); + if (VCost > ICost + JCost) + return false; + + // We don't want to fuse to a type that will be split, even + // if the two input types will also be split and there is no other + // associated cost. + unsigned VParts = VTTI->getNumberOfParts(VType); + if (VParts > 1) + return false; + else if (!VParts && VCost == ICost + JCost) + return false; + + CostSavings = ICost + JCost - VCost; + } } else { return false; } + } else if (VTTI) { + unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2); + unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); + Type *VT1 = getVecTypeForPair(IT1, JT1), + *VT2 = getVecTypeForPair(IT2, JT2); + unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); + + if (VCost > ICost + JCost) + return false; + + // We don't want to fuse to a type that will be split, even + // if the two input types will also be split and there is no other + // associated cost. + unsigned VParts = VTTI->getNumberOfParts(VT1); + if (VParts > 1) + return false; + else if (!VParts && VCost == ICost + JCost) + return false; + + CostSavings = ICost + JCost - VCost; } // The powi intrinsic is special because only the first argument is @@ -845,6 +1076,8 @@ namespace { bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap<Value *, Value *> &CandidatePairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len) { BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -881,7 +1114,9 @@ namespace { // J does not use I, and comes before the first use of I, so it can be // merged with I if the instructions are compatible. - if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue; + int CostSavings, FixedOrder; + if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, + CostSavings, FixedOrder)) continue; // J is a candidate for merging with I. if (!PairableInsts.size() || @@ -890,6 +1125,14 @@ namespace { } CandidatePairs.insert(ValuePair(I, J)); + if (VTTI) + CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), + CostSavings)); + + if (FixedOrder == 1) + FixedOrderPairs.insert(ValuePair(I, J)); + else if (FixedOrder == -1) + FixedOrderPairs.insert(ValuePair(J, I)); // The next call to this function must start after the last instruction // selected during this invocation. @@ -899,7 +1142,8 @@ namespace { } DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair " - << *I << " <-> " << *J << "\n"); + << *I << " <-> " << *J << " (cost savings: " << + CostSavings << ")\n"); // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected @@ -927,6 +1171,7 @@ namespace { std::multimap<Value *, Value *> &CandidatePairs, std::vector<Value *> &PairableInsts, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, ValuePair P) { StoreInst *SI, *SJ; @@ -958,12 +1203,18 @@ namespace { VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); // Look for <I, J>: - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) - ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + VPPair VP(P, ValuePair(*I, *J)); + ConnectedPairs.insert(VP); + PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); + } // Look for <J, I>: - if (isSecondInIteratorPair<Value*>(*I, JPairRange)) - ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); + if (isSecondInIteratorPair<Value*>(*I, JPairRange)) { + VPPair VP(P, ValuePair(*J, *I)); + ConnectedPairs.insert(VP); + PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); + } } if (Config.SplatBreaksChain) continue; @@ -974,8 +1225,11 @@ namespace { P.first == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) - ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + VPPair VP(P, ValuePair(*I, *J)); + ConnectedPairs.insert(VP); + PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); + } } } @@ -997,8 +1251,11 @@ namespace { P.second == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) - ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + VPPair VP(P, ValuePair(*I, *J)); + ConnectedPairs.insert(VP); + PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); + } } } } @@ -1009,7 +1266,8 @@ namespace { void BBVectorize::computeConnectedPairs( std::multimap<Value *, Value *> &CandidatePairs, std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs) { + std::multimap<ValuePair, ValuePair> &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes) { for (std::vector<Value *>::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { @@ -1018,7 +1276,7 @@ namespace { for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; P != choiceRange.second; ++P) computePairsConnectedTo(CandidatePairs, PairableInsts, - ConnectedPairs, *P); + ConnectedPairs, PairConnectionTypes, *P); } DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() @@ -1353,13 +1611,17 @@ namespace { // pairs, given the choice of root pairs as an iterator range. void BBVectorize::findBestTreeFor( std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, DenseSet<ValuePair> &PairableInstUsers, std::multimap<ValuePair, ValuePair> &PairableInstUserMap, DenseMap<Value *, Value *> &ChosenPairs, DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, - size_t &BestEffSize, VPIteratorPair ChoiceRange, + int &BestEffSize, VPIteratorPair ChoiceRange, bool UseCycleCheck) { for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; J != ChoiceRange.second; ++J) { @@ -1409,17 +1671,243 @@ namespace { PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, PrunedTree, *J, UseCycleCheck); - size_t EffSize = 0; - for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) - EffSize += getDepthFactor(S->first); + int EffSize = 0; + if (VTTI) { + DenseSet<Value *> PrunedTreeInstrs; + for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), + E = PrunedTree.end(); S != E; ++S) { + PrunedTreeInstrs.insert(S->first); + PrunedTreeInstrs.insert(S->second); + } + + // The set of pairs that have already contributed to the total cost. + DenseSet<ValuePair> IncomingPairs; + + // The node weights represent the cost savings associated with + // fusing the pair of instructions. + for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), + E = PrunedTree.end(); S != E; ++S) { + bool FlipOrder = false; + + if (getDepthFactor(S->first)) { + int ESContrib = CandidatePairCostSavings.find(*S)->second; + DEBUG(if (DebugPairSelection) dbgs() << "\tweight {" + << *S->first << " <-> " << *S->second << "} = " << + ESContrib << "\n"); + EffSize += ESContrib; + } + + // The edge weights contribute in a negative sense: they represent + // the cost of shuffles. + VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S); + if (IP.first != ConnectedPairDeps.end()) { + unsigned NumDepsDirect = 0, NumDepsSwap = 0; + for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; + Q != IP.second; ++Q) { + if (!PrunedTree.count(Q->second)) + continue; + DenseMap<VPPair, unsigned>::iterator R = + PairConnectionTypes.find(VPPair(Q->second, Q->first)); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + if (R->second == PairConnectionDirect) + ++NumDepsDirect; + else if (R->second == PairConnectionSwap) + ++NumDepsSwap; + } + + // If there are more swaps than direct connections, then + // the pair order will be flipped during fusion. So the real + // number of swaps is the minimum number. + FlipOrder = !FixedOrderPairs.count(*S) && + ((NumDepsSwap > NumDepsDirect) || + FixedOrderPairs.count(ValuePair(S->second, S->first))); + + for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; + Q != IP.second; ++Q) { + if (!PrunedTree.count(Q->second)) + continue; + DenseMap<VPPair, unsigned>::iterator R = + PairConnectionTypes.find(VPPair(Q->second, Q->first)); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + Type *Ty1 = Q->second.first->getType(), + *Ty2 = Q->second.second->getType(); + Type *VTy = getVecTypeForPair(Ty1, Ty2); + if ((R->second == PairConnectionDirect && FlipOrder) || + (R->second == PairConnectionSwap && !FlipOrder) || + R->second == PairConnectionSplat) { + int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, + VTy, VTy); + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << + *Q->second.first << " <-> " << *Q->second.second << + "} -> {" << + *S->first << " <-> " << *S->second << "} = " << + ESContrib << "\n"); + EffSize -= ESContrib; + } + } + } + + // Compute the cost of outgoing edges. We assume that edges outgoing + // to shuffles, inserts or extracts can be merged, and so contribute + // no additional cost. + if (!S->first->getType()->isVoidTy()) { + Type *Ty1 = S->first->getType(), + *Ty2 = S->second->getType(); + Type *VTy = getVecTypeForPair(Ty1, Ty2); + + bool NeedsExtraction = false; + for (Value::use_iterator I = S->first->use_begin(), + IE = S->first->use_end(); I != IE; ++I) { + if (isa<ShuffleVectorInst>(*I) || + isa<InsertElementInst>(*I) || + isa<ExtractElementInst>(*I)) + continue; + if (PrunedTreeInstrs.count(*I)) + continue; + NeedsExtraction = true; + break; + } + + if (NeedsExtraction) { + int ESContrib; + if (Ty1->isVectorTy()) + ESContrib = (int) getInstrCost(Instruction::ShuffleVector, + Ty1, VTy); + else + ESContrib = (int) VTTI->getVectorInstrCost( + Instruction::ExtractElement, VTy, 0); + + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << + *S->first << "} = " << ESContrib << "\n"); + EffSize -= ESContrib; + } + + NeedsExtraction = false; + for (Value::use_iterator I = S->second->use_begin(), + IE = S->second->use_end(); I != IE; ++I) { + if (isa<ShuffleVectorInst>(*I) || + isa<InsertElementInst>(*I) || + isa<ExtractElementInst>(*I)) + continue; + if (PrunedTreeInstrs.count(*I)) + continue; + NeedsExtraction = true; + break; + } + + if (NeedsExtraction) { + int ESContrib; + if (Ty2->isVectorTy()) + ESContrib = (int) getInstrCost(Instruction::ShuffleVector, + Ty2, VTy); + else + ESContrib = (int) VTTI->getVectorInstrCost( + Instruction::ExtractElement, VTy, 1); + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << + *S->second << "} = " << ESContrib << "\n"); + EffSize -= ESContrib; + } + } + + // Compute the cost of incoming edges. + if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) { + Instruction *S1 = cast<Instruction>(S->first), + *S2 = cast<Instruction>(S->second); + for (unsigned o = 0; o < S1->getNumOperands(); ++o) { + Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o); + + // Combining constants into vector constants (or small vector + // constants into larger ones are assumed free). + if (isa<Constant>(O1) && isa<Constant>(O2)) + continue; + + if (FlipOrder) + std::swap(O1, O2); + + ValuePair VP = ValuePair(O1, O2); + ValuePair VPR = ValuePair(O2, O1); + + // Internal edges are not handled here. + if (PrunedTree.count(VP) || PrunedTree.count(VPR)) + continue; + + Type *Ty1 = O1->getType(), + *Ty2 = O2->getType(); + Type *VTy = getVecTypeForPair(Ty1, Ty2); + + // Combining vector operations of the same type is also assumed + // folded with other operations. + if (Ty1 == Ty2 && + (isa<ShuffleVectorInst>(O1) || + isa<InsertElementInst>(O1) || + isa<InsertElementInst>(O1)) && + (isa<ShuffleVectorInst>(O2) || + isa<InsertElementInst>(O2) || + isa<InsertElementInst>(O2))) + continue; + + int ESContrib; + // This pair has already been formed. + if (IncomingPairs.count(VP)) { + continue; + } else if (IncomingPairs.count(VPR)) { + ESContrib = (int) getInstrCost(Instruction::ShuffleVector, + VTy, VTy); + } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { + ESContrib = (int) VTTI->getVectorInstrCost( + Instruction::InsertElement, VTy, 0); + ESContrib += (int) VTTI->getVectorInstrCost( + Instruction::InsertElement, VTy, 1); + } else if (!Ty1->isVectorTy()) { + // O1 needs to be inserted into a vector of size O2, and then + // both need to be shuffled together. + ESContrib = (int) VTTI->getVectorInstrCost( + Instruction::InsertElement, Ty2, 0); + ESContrib += (int) getInstrCost(Instruction::ShuffleVector, + VTy, Ty2); + } else if (!Ty2->isVectorTy()) { + // O2 needs to be inserted into a vector of size O1, and then + // both need to be shuffled together. + ESContrib = (int) VTTI->getVectorInstrCost( + Instruction::InsertElement, Ty1, 0); + ESContrib += (int) getInstrCost(Instruction::ShuffleVector, + VTy, Ty1); + } else { + Type *TyBig = Ty1, *TySmall = Ty2; + if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements()) + std::swap(TyBig, TySmall); + + ESContrib = (int) getInstrCost(Instruction::ShuffleVector, + VTy, TyBig); + if (TyBig != TySmall) + ESContrib += (int) getInstrCost(Instruction::ShuffleVector, + TyBig, TySmall); + } + + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" + << *O1 << " <-> " << *O2 << "} = " << + ESContrib << "\n"); + EffSize -= ESContrib; + IncomingPairs.insert(VP); + } + } + } + } else { + for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), + E = PrunedTree.end(); S != E; ++S) + EffSize += (int) getDepthFactor(S->first); + } DEBUG(if (DebugPairSelection) dbgs() << "BBV: found pruned Tree for pair {" << *J->first << " <-> " << *J->second << "} of depth " << MaxDepth << " and size " << PrunedTree.size() << " (effective size: " << EffSize << ")\n"); - if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { + if (((VTTI && !UseChainDepthWithTI) || + MaxDepth >= Config.ReqChainDepth) && + EffSize > 0 && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; BestTree = PrunedTree; @@ -1431,8 +1919,12 @@ namespace { // that will be fused into vector instructions. void BBVectorize::choosePairs( std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, DenseSet<ValuePair> &PairableInstUsers, DenseMap<Value *, Value *>& ChosenPairs) { bool UseCycleCheck = @@ -1447,9 +1939,12 @@ namespace { VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); // The best pair to choose and its tree: - size_t BestMaxDepth = 0, BestEffSize = 0; + size_t BestMaxDepth = 0; + int BestEffSize = 0; DenseSet<ValuePair> BestTree; - findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, + findBestTreeFor(CandidatePairs, CandidatePairCostSavings, + PairableInsts, FixedOrderPairs, PairConnectionTypes, + ConnectedPairs, ConnectedPairDeps, PairableInstUsers, PairableInstUserMap, ChosenPairs, BestTree, BestMaxDepth, BestEffSize, ChoiceRange, UseCycleCheck); @@ -1502,24 +1997,19 @@ namespace { // Returns the value that is to be used as the pointer input to the vector // instruction that fuses I with J. Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, - Instruction *I, Instruction *J, unsigned o, - bool FlipMemInputs) { + Instruction *I, Instruction *J, unsigned o) { Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment; + unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; int64_t OffsetInElmts; - // Note: the analysis might fail here, that is why FlipMemInputs has + // Note: the analysis might fail here, that is why the pair order has // been precomputed (OffsetInElmts must be unused here). (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - OffsetInElmts); + IAddressSpace, JAddressSpace, + OffsetInElmts, false); // The pointer value is taken to be the one with the lowest offset. - Value *VPtr; - if (!FlipMemInputs) { - VPtr = IPtr; - } else { - VPtr = JPtr; - } + Value *VPtr = IPtr; Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType(); Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType(); @@ -1527,7 +2017,7 @@ namespace { Type *VArgPtrType = PointerType::get(VArgType, cast<PointerType>(IPtr->getType())->getAddressSpace()); return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), - /* insert before */ FlipMemInputs ? J : I); + /* insert before */ I); } void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, @@ -1597,7 +2087,7 @@ namespace { Instruction *J, unsigned o, Value *&LOp, unsigned numElemL, Type *ArgTypeL, Type *ArgTypeH, - unsigned IdxOff) { + bool IBeforeJ, unsigned IdxOff) { bool ExpandedIEChain = false; if (InsertElementInst *LIE = dyn_cast<InsertElementInst>(LOp)) { // If we have a pure insertelement chain, then this can be rewritten @@ -1631,8 +2121,9 @@ namespace { LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], ConstantInt::get(Type::getInt32Ty(Context), i + IdxOff), - getReplacementName(I, true, o, i+1)); - LIENext->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o, i+1)); + LIENext->insertBefore(IBeforeJ ? J : I); LIEPrev = LIENext; } @@ -1647,7 +2138,7 @@ namespace { // Returns the value to be used as the specified operand of the vector // instruction that fuses I with J. Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool FlipMemInputs) { + Instruction *J, unsigned o, bool IBeforeJ) { Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); @@ -1658,12 +2149,6 @@ namespace { Instruction *L = I, *H = J; Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; - if (FlipMemInputs) { - L = J; - H = I; - ArgTypeL = ArgTypeJ; - ArgTypeH = ArgTypeI; - } unsigned numElemL; if (ArgTypeL->isVectorTy()) @@ -1816,8 +2301,9 @@ namespace { Instruction *S = new ShuffleVectorInst(I1, UndefValue::get(I1T), ConstantVector::get(Mask), - getReplacementName(I, true, o)); - S->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o)); + S->insertBefore(IBeforeJ ? J : I); return S; } @@ -1838,8 +2324,9 @@ namespace { Instruction *NewI1 = new ShuffleVectorInst(I1, UndefValue::get(I1T), ConstantVector::get(Mask), - getReplacementName(I, true, o, 1)); - NewI1->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); + NewI1->insertBefore(IBeforeJ ? J : I); I1 = NewI1; I1T = I2T; I1Elem = I2Elem; @@ -1854,8 +2341,9 @@ namespace { Instruction *NewI2 = new ShuffleVectorInst(I2, UndefValue::get(I2T), ConstantVector::get(Mask), - getReplacementName(I, true, o, 1)); - NewI2->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); + NewI2->insertBefore(IBeforeJ ? J : I); I2 = NewI2; I2T = I1T; I2Elem = I1Elem; @@ -1875,8 +2363,8 @@ namespace { Instruction *NewOp = new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), - getReplacementName(I, true, o)); - NewOp->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, true, o)); + NewOp->insertBefore(IBeforeJ ? J : I); return NewOp; } } @@ -1884,17 +2372,17 @@ namespace { Type *ArgType = ArgTypeL; if (numElemL < numElemH) { if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, - ArgTypeL, VArgType, 1)) { + ArgTypeL, VArgType, IBeforeJ, 1)) { // This is another short-circuit case: we're combining a scalar into // a vector that is formed by an IE chain. We've just expanded the IE // chain, now insert the scalar and we're done. Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, - getReplacementName(I, true, o)); - S->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, true, o)); + S->insertBefore(IBeforeJ ? J : I); return S; } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, - ArgTypeH)) { + ArgTypeH, IBeforeJ)) { // The two vector inputs to the shuffle must be the same length, // so extend the smaller vector to be the same length as the larger one. Instruction *NLOp; @@ -1909,29 +2397,32 @@ namespace { NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), ConstantVector::get(Mask), - getReplacementName(I, true, o, 1)); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); } else { NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, - getReplacementName(I, true, o, 1)); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); } - NLOp->insertBefore(J); + NLOp->insertBefore(IBeforeJ ? J : I); LOp = NLOp; } ArgType = ArgTypeH; } else if (numElemL > numElemH) { if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, - ArgTypeH, VArgType)) { + ArgTypeH, VArgType, IBeforeJ)) { Instruction *S = InsertElementInst::Create(LOp, HOp, ConstantInt::get(Type::getInt32Ty(Context), numElemL), - getReplacementName(I, true, o)); - S->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o)); + S->insertBefore(IBeforeJ ? J : I); return S; } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, - ArgTypeL)) { + ArgTypeL, IBeforeJ)) { Instruction *NHOp; if (numElemH > 1) { std::vector<Constant *> Mask(numElemL); @@ -1943,13 +2434,15 @@ namespace { NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), ConstantVector::get(Mask), - getReplacementName(I, true, o, 1)); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); } else { NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, - getReplacementName(I, true, o, 1)); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); } - NHOp->insertBefore(J); + NHOp->insertBefore(IBeforeJ ? J : I); HOp = NHOp; } } @@ -1967,19 +2460,21 @@ namespace { } Instruction *BV = new ShuffleVectorInst(LOp, HOp, - ConstantVector::get(Mask), - getReplacementName(I, true, o)); - BV->insertBefore(J); + ConstantVector::get(Mask), + getReplacementName(IBeforeJ ? I : J, true, o)); + BV->insertBefore(IBeforeJ ? J : I); return BV; } Instruction *BV1 = InsertElementInst::Create( UndefValue::get(VArgType), LOp, CV0, - getReplacementName(I, true, o, 1)); - BV1->insertBefore(I); + getReplacementName(IBeforeJ ? I : J, + true, o, 1)); + BV1->insertBefore(IBeforeJ ? J : I); Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, - getReplacementName(I, true, o, 2)); - BV2->insertBefore(J); + getReplacementName(IBeforeJ ? I : J, + true, o, 2)); + BV2->insertBefore(IBeforeJ ? J : I); return BV2; } @@ -1988,7 +2483,7 @@ namespace { void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector<Value *, 3> &ReplacedOperands, - bool FlipMemInputs) { + bool IBeforeJ) { unsigned NumOperands = I->getNumOperands(); for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { @@ -1997,8 +2492,7 @@ namespace { if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) { // This is the pointer for a load/store instruction. - ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o, - FlipMemInputs); + ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o); continue; } else if (isa<CallInst>(I)) { Function *F = cast<CallInst>(I)->getCalledFunction(); @@ -2026,8 +2520,7 @@ namespace { continue; } - ReplacedOperands[o] = - getReplacementInput(Context, I, J, o, FlipMemInputs); + ReplacedOperands[o] = getReplacementInput(Context, I, J, o, IBeforeJ); } } @@ -2038,8 +2531,7 @@ namespace { void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I, Instruction *J, Instruction *K, Instruction *&InsertionPt, - Instruction *&K1, Instruction *&K2, - bool FlipMemInputs) { + Instruction *&K1, Instruction *&K2) { if (isa<StoreInst>(I)) { AA->replaceWithNewValue(I, K); AA->replaceWithNewValue(J, K); @@ -2069,13 +2561,11 @@ namespace { } K1 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask2 : Mask1), + ConstantVector::get( Mask1), getReplacementName(K, false, 1)); } else { Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); - K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0, + K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1)); } @@ -2087,13 +2577,11 @@ namespace { } K2 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask1 : Mask2), + ConstantVector::get( Mask2), getReplacementName(K, false, 2)); } else { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); - K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1, + K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2)); } @@ -2193,36 +2681,6 @@ namespace { } } - // As with the aliasing information, SCEV can also change because of - // vectorization. This information is used to compute relative pointer - // offsets; the necessary information will be cached here prior to - // fusion. - void BBVectorize::collectPtrInfo(std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<Value *> &LowPtrInsts) { - for (std::vector<Value *>::iterator PI = PairableInsts.begin(), - PIE = PairableInsts.end(); PI != PIE; ++PI) { - DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); - if (P == ChosenPairs.end()) continue; - - Instruction *I = cast<Instruction>(P->first); - Instruction *J = cast<Instruction>(P->second); - - if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) - continue; - - Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment; - int64_t OffsetInElmts; - if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - OffsetInElmts) || abs64(OffsetInElmts) != 1) - llvm_unreachable("Pre-fusion pointer analysis failed"); - - Value *LowPI = (OffsetInElmts > 0) ? I : J; - LowPtrInsts.insert(LowPI); - } - } - // When the first instruction in each pair is cloned, it will inherit its // parent's metadata. This metadata must be combined with that of the other // instruction in a safe way. @@ -2256,27 +2714,27 @@ namespace { // second member). void BBVectorize::fuseChosenPairs(BasicBlock &BB, std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *> &ChosenPairs) { + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + std::multimap<ValuePair, ValuePair> &ConnectedPairs, + std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) { LLVMContext& Context = BB.getContext(); // During the vectorization process, the order of the pairs to be fused // could be flipped. So we'll add each pair, flipped, into the ChosenPairs // list. After a pair is fused, the flipped pair is removed from the list. - std::vector<ValuePair> FlippedPairs; - FlippedPairs.reserve(ChosenPairs.size()); + DenseSet<ValuePair> FlippedPairs; for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(), E = ChosenPairs.end(); P != E; ++P) - FlippedPairs.push_back(ValuePair(P->second, P->first)); - for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(), + FlippedPairs.insert(ValuePair(P->second, P->first)); + for (DenseSet<ValuePair>::iterator P = FlippedPairs.begin(), E = FlippedPairs.end(); P != E; ++P) ChosenPairs.insert(*P); std::multimap<Value *, Value *> LoadMoveSet; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); - DenseSet<Value *> LowPtrInsts; - collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); - DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { @@ -2316,44 +2774,91 @@ namespace { continue; } - bool FlipMemInputs = false; - if (isa<LoadInst>(I) || isa<StoreInst>(I)) - FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); + // If the pair must have the other order, then flip it. + bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); + if (!FlipPairOrder && !FixedOrderPairs.count(ValuePair(I, J))) { + // This pair does not have a fixed order, and so we might want to + // flip it if that will yield fewer shuffles. We count the number + // of dependencies connected via swaps, and those directly connected, + // and flip the order if the number of swaps is greater. + bool OrigOrder = true; + VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J)); + if (IP.first == ConnectedPairDeps.end()) { + IP = ConnectedPairDeps.equal_range(ValuePair(J, I)); + OrigOrder = false; + } + + if (IP.first != ConnectedPairDeps.end()) { + unsigned NumDepsDirect = 0, NumDepsSwap = 0; + for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; + Q != IP.second; ++Q) { + DenseMap<VPPair, unsigned>::iterator R = + PairConnectionTypes.find(VPPair(Q->second, Q->first)); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + if (R->second == PairConnectionDirect) + ++NumDepsDirect; + else if (R->second == PairConnectionSwap) + ++NumDepsSwap; + } + + if (!OrigOrder) + std::swap(NumDepsDirect, NumDepsSwap); + if (NumDepsSwap > NumDepsDirect) { + FlipPairOrder = true; + DEBUG(dbgs() << "BBV: reordering pair: " << *I << + " <-> " << *J << "\n"); + } + } + } + + Instruction *L = I, *H = J; + if (FlipPairOrder) + std::swap(H, L); + + // If the pair being fused uses the opposite order from that in the pair + // connection map, then we need to flip the types. + VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L)); + for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; + Q != IP.second; ++Q) { + DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + if (R->second == PairConnectionDirect) + R->second = PairConnectionSwap; + else if (R->second == PairConnectionSwap) + R->second = PairConnectionDirect; + } + + bool LBeforeH = !FlipPairOrder; unsigned NumOperands = I->getNumOperands(); SmallVector<Value *, 3> ReplacedOperands(NumOperands); - getReplacementInputsForPair(Context, I, J, ReplacedOperands, - FlipMemInputs); + getReplacementInputsForPair(Context, L, H, ReplacedOperands, + LBeforeH); // Make a copy of the original operation, change its type to the vector // type and replace its operands with the vector operands. - Instruction *K = I->clone(); - if (I->hasName()) K->takeName(I); + Instruction *K = L->clone(); + if (L->hasName()) + K->takeName(L); + else if (H->hasName()) + K->takeName(H); if (!isa<StoreInst>(K)) - K->mutateType(getVecTypeForPair(I->getType(), J->getType())); + K->mutateType(getVecTypeForPair(L->getType(), H->getType())); - combineMetadata(K, J); + combineMetadata(K, H); for (unsigned o = 0; o < NumOperands; ++o) K->setOperand(o, ReplacedOperands[o]); - // If we've flipped the memory inputs, make sure that we take the correct - // alignment. - if (FlipMemInputs) { - if (isa<StoreInst>(K)) - cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment()); - else - cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment()); - } - K->insertAfter(J); // Instruction insertion point: Instruction *InsertionPt = K; Instruction *K1 = 0, *K2 = 0; - replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2, - FlipMemInputs); + replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); // The use tree of the first original instruction must be moved to after // the location of the second instruction. The entire use tree of the @@ -2363,10 +2868,10 @@ namespace { moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); if (!isa<StoreInst>(I)) { - I->replaceAllUsesWith(K1); - J->replaceAllUsesWith(K2); - AA->replaceWithNewValue(I, K1); - AA->replaceWithNewValue(J, K2); + L->replaceAllUsesWith(K1); + H->replaceAllUsesWith(K2); + AA->replaceWithNewValue(L, K1); + AA->replaceWithNewValue(H, K2); } // Instructions that may read from memory may be in the load move set. @@ -2399,6 +2904,9 @@ namespace { SE->forgetValue(J); I->eraseFromParent(); J->eraseFromParent(); + + DEBUG(if (PrintAfterEveryPair) dbgs() << "BBV: block is now: \n" << + BB << "\n"); } DEBUG(dbgs() << "BBV: final: \n" << BB << "\n"); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 423c7a4911..892808760f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" @@ -74,6 +75,9 @@ static cl::opt<unsigned> VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Set the default vectorization width. Zero is autoselect.")); +/// We don't vectorize loops with a known constant trip count below this number. +const unsigned TinyTripCountThreshold = 16; + namespace { // Forward declarations. @@ -98,8 +102,9 @@ class SingleBlockLoopVectorizer { public: /// Ctor. SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li, - LPPassManager *Lpm, unsigned VecWidth): - OrigLoop(Orig), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth), + DominatorTree *dt, LPPassManager *Lpm, + unsigned VecWidth): + OrigLoop(Orig), SE(Se), LI(Li), DT(dt), LPM(Lpm), VF(VecWidth), Builder(Se->getContext()), Induction(0), OldInduction(0) { } // Perform the actual loop widening (vectorization). @@ -108,9 +113,9 @@ public: createEmptyLoop(Legal); /// Widen each instruction in the old loop to a new one in the new loop. /// Use the Legality module to find the induction and reduction variables. - vectorizeLoop(Legal); + vectorizeLoop(Legal); // register the new loop. - cleanup(); + updateAnalysis(); } private: @@ -119,7 +124,7 @@ private: /// Copy and widen the instructions from the old loop. void vectorizeLoop(LoopVectorizationLegality *Legal); /// Insert the new loop to the loop hierarchy and pass manager. - void cleanup(); + void updateAnalysis(); /// This instruction is un-vectorizable. Implement it as a sequence /// of scalars. @@ -155,6 +160,8 @@ private: ScalarEvolution *SE; // Loop Info. LoopInfo *LI; + // Dominator Tree. + DominatorTree *DT; // Loop Pass Manager; LPPassManager *LPM; // The vectorization factor to use. @@ -165,6 +172,10 @@ private: // --- Vectorization state --- + /// The vector-loop preheader. + BasicBlock *LoopVectorPreHeader; + /// The scalar-loop preheader. + BasicBlock *LoopScalarPreHeader; /// Middle Block between the vector and the scalar. BasicBlock *LoopMiddleBlock; ///The ExitBlock of the scalar loop. @@ -203,15 +214,13 @@ public: TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { } /// This represents the kinds of reductions that we support. - /// We use the enum values to hold the 'identity' value for - /// each operand. This value does not change the result if applied. enum ReductionKind { - NoReduction = -1, /// Not a reduction. - IntegerAdd = 0, /// Sum of numbers. - IntegerMult = 1, /// Product of numbers. - IntegerOr = 2, /// Bitwise or logical OR of numbers. - IntegerAnd = 3, /// Bitwise or logical AND of numbers. - IntegerXor = 4 /// Bitwise or logical XOR of numbers. + NoReduction, /// Not a reduction. + IntegerAdd, /// Sum of numbers. + IntegerMult, /// Product of numbers. + IntegerOr, /// Bitwise or logical OR of numbers. + IntegerAnd, /// Bitwise or logical AND of numbers. + IntegerXor /// Bitwise or logical XOR of numbers. }; /// This POD struct holds information about reduction variables. @@ -254,6 +263,9 @@ public: /// This check allows us to vectorize A[idx] into a wide load/store. bool isConsecutiveGep(Value *Ptr); + /// Returns true if this instruction will remain scalar after vectorization. + bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);} + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -291,6 +303,9 @@ private: /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. SmallPtrSet<Value*, 4> AllowedExit; + /// This set holds the variables which are known to be uniform after + /// vectorization. + SmallPtrSet<Instruction*, 4> Uniforms; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -311,7 +326,7 @@ public: /// Returns the most profitable vectorization factor for the loop that is /// smaller or equal to the VF argument. This method checks every power /// of two up to VF. - unsigned findBestVectorizationFactor(unsigned VF = 4); + unsigned findBestVectorizationFactor(unsigned VF = 8); private: /// Returns the expected execution cost. The unit of the cost does @@ -324,6 +339,11 @@ private: /// width. Vector width of one means scalar. unsigned getInstructionCost(Instruction *I, unsigned VF); + /// A helper function for converting Scalar types to vector types. + /// If the incoming type is void, we return void. If the VF is 1, we return + /// the scalar type. + static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -346,6 +366,7 @@ struct LoopVectorize : public LoopPass { DataLayout *DL; LoopInfo *LI; TargetTransformInfo *TTI; + DominatorTree *DT; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -356,6 +377,7 @@ struct LoopVectorize : public LoopPass { DL = getAnalysisIfAvailable<DataLayout>(); LI = &getAnalysis<LoopInfo>(); TTI = getAnalysisIfAvailable<TargetTransformInfo>(); + DT = &getAnalysis<DominatorTree>(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); @@ -387,10 +409,12 @@ struct LoopVectorize : public LoopPass { VF = VectorizationFactor; } - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ").\n"); + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<< + L->getHeader()->getParent()->getParent()->getModuleIdentifier()<< + "\n"); // If we decided that it is *legal* to vectorizer the loop then do it. - SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF); + SingleBlockLoopVectorizer LB(L, SE, LI, DT, &LPM, VF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -403,6 +427,9 @@ struct LoopVectorize : public LoopPass { AU.addRequiredID(LCSSAID); AU.addRequired<LoopInfo>(); AU.addRequired<ScalarEvolution>(); + AU.addRequired<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + AU.addPreserved<DominatorTree>(); } }; @@ -497,7 +524,7 @@ SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { SmallVector<Constant*, 8> Indices; // Create a vector of consecutive numbers from zero to VF. for (unsigned i = 0; i < VF; ++i) - Indices.push_back(ConstantInt::get(ScalarTy, Val)); + Indices.push_back(ConstantInt::get(ScalarTy, Val, true)); // Add the consecutive indices to the vector value. return ConstantVector::get(Indices); @@ -573,7 +600,8 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) { WidenMap[Instr] = VecResults; } -void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { +void +SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the @@ -608,6 +636,10 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); + // The loop index does not have to start at Zero. It starts with this value. + OldInduction = Legal->getInduction(); + Value *StartIdx = OldInduction->getIncomingValueForBlock(BypassBlock); + assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop"); assert(BypassBlock && "Invalid loop structure"); @@ -623,7 +655,6 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal "scalar.preheader"); // Find the induction variable. BasicBlock *OldBasicBlock = OrigLoop->getHeader(); - OldInduction = Legal->getInduction(); assert(OldInduction && "We must have a single phi node."); Type *IdxTy = OldInduction->getType(); @@ -633,7 +664,6 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal // Generate the induction variable. Induction = Builder.CreatePHI(IdxTy, 2, "index"); - Constant *Zero = ConstantInt::get(IdxTy, 0); Constant *Step = ConstantInt::get(IdxTy, VF); // Find the loop boundaries. @@ -657,15 +687,22 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc); + + // Add the start index to the loop count to get the new end index. + Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc); + // Now we need to generate the expression for N - (N % VF), which is // the part that the vectorized body will execute. Constant *CIVF = ConstantInt::get(IdxTy, VF); Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc); Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc); + Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx, + "end.idx.rnd.down", Loc); // Now, compare the new count to zero. If it is zero, jump to the scalar part. Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - CountRoundDown, ConstantInt::getNullValue(IdxTy), + IdxEndRoundDown, + StartIdx, "cmp.zero", Loc); BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); // Remove the old terminator. @@ -674,8 +711,8 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal // Add a check in the middle block to see if we have completed // all of the iterations in the first vector loop. // If (N - N%VF) == N, then we *don't* need to run the remainder. - Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count, - CountRoundDown, "cmp.n", + Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd, + IdxEndRoundDown, "cmp.n", MiddleBlock->getTerminator()); BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator()); @@ -684,10 +721,10 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal // Create i+1 and fill the PHINode. Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next"); - Induction->addIncoming(Zero, VectorPH); + Induction->addIncoming(StartIdx, VectorPH); Induction->addIncoming(NextIdx, VecBody); // Create the compare. - Value *ICmp = Builder.CreateICmpEQ(NextIdx, CountRoundDown); + Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown); Builder.CreateCondBr(ICmp, MiddleBlock, VecBody); // Now we have two terminators. Remove the old one from the block. @@ -695,7 +732,7 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal // Fix the scalar body iteration count. unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH); - OldInduction->setIncomingValue(BlockIdx, CountRoundDown); + OldInduction->setIncomingValue(BlockIdx, IdxEndRoundDown); // Get ready to start creating new instructions into the vectorized body. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); @@ -714,6 +751,8 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal } // Save the state. + LoopVectorPreHeader = VectorPH; + LoopScalarPreHeader = ScalarPH; LoopMiddleBlock = MiddleBlock; LoopExitBlock = ExitBlock; LoopVectorBody = VecBody; @@ -721,6 +760,27 @@ void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal LoopBypassBlock = BypassBlock; } +/// This function returns the identity element (or neutral element) for +/// the operation K. +static unsigned +getReductionIdentity(LoopVectorizationLegality::ReductionKind K) { + switch (K) { + case LoopVectorizationLegality::IntegerXor: + case LoopVectorizationLegality::IntegerAdd: + case LoopVectorizationLegality::IntegerOr: + // Adding, Xoring, Oring zero to a number does not change it. + return 0; + case LoopVectorizationLegality::IntegerMult: + // Multiplying a number by 1 does not change it. + return 1; + case LoopVectorizationLegality::IntegerAnd: + // AND-ing a number with an all-1 value does not change it. + return -1; + default: + llvm_unreachable("Unknown reduction kind"); + } +} + void SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { //===------------------------------------------------===// @@ -789,8 +849,19 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); Value *A = getVectorValue(Inst->getOperand(0)); Value *B = getVectorValue(Inst->getOperand(1)); + // Use this vector value for all users of the original instruction. - WidenMap[Inst] = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B); + WidenMap[Inst] = V; + + // Update the NSW, NUW and Exact flags. + BinaryOperator *VecOp = cast<BinaryOperator>(V); + if (isa<OverflowingBinaryOperator>(BinOp)) { + VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); + VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); + } + if (isa<PossiblyExactOperator>(VecOp)) + VecOp->setIsExact(BinOp->isExact()); break; } case Instruction::Select: { @@ -844,8 +915,8 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0)); + Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); @@ -874,7 +945,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1)); - LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0)); + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); @@ -945,10 +1016,9 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr); Type *VecTy = VectorExit->getType(); - // Find the reduction identity variable. The value of the enum is the - // identity. Zero for addition. One for Multiplication. - unsigned IdentitySclr = RdxDesc.Kind; - Constant *Identity = getUniformVector(IdentitySclr, + // Find the reduction identity variable. Zero for addition, or, xor, + // one for multiplication, -1 for And. + Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind), VecTy->getScalarType()); // This vector is the Identity vector where the first element is the @@ -1040,9 +1110,22 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { }// end of for each redux variable. } -void SingleBlockLoopVectorizer::cleanup() { +void SingleBlockLoopVectorizer::updateAnalysis() { // The original basic block. SE->forgetLoop(OrigLoop); + + // Update the dominator tree information. + assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) && + "Entry does not dominate exit."); + + DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock); + DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader); + DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock); + DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock); + DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); + DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); + + DEBUG(DT->verifyAnalysis()); } bool LoopVectorizationLegality::canVectorize() { @@ -1076,6 +1159,14 @@ bool LoopVectorizationLegality::canVectorize() { return false; } + // Do not loop-vectorize loops with a tiny trip count. + unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB); + if (TC > 0u && TC < TinyTripCountThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << + "This loop is not worth vectorizing.\n"); + return false; + } + DEBUG(dbgs() << "LV: We can vectorize this loop!\n"); // Okay! We can vectorize. At this point we don't have any other mem analysis @@ -1139,8 +1230,7 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { // We still don't handle functions. CallInst *CI = dyn_cast<CallInst>(I); if (CI) { - DEBUG(dbgs() << "LV: Found a call site:"<< - CI->getCalledFunction()->getName() << "\n"); + DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -1172,9 +1262,40 @@ bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) { return false; } - // If the memory dependencies do not prevent us from - // vectorizing, then vectorize. - return canVectorizeMemory(BB); + // Don't vectorize if the memory dependencies do not allow vectorization. + if (!canVectorizeMemory(BB)) + return false; + + // We now know that the loop is vectorizable! + // Collect variables that will remain uniform after vectorization. + std::vector<Value*> Worklist; + + // Start with the conditional branch and walk up the block. + Worklist.push_back(BB.getTerminator()->getOperand(0)); + + while (Worklist.size()) { + Instruction *I = dyn_cast<Instruction>(Worklist.back()); + Worklist.pop_back(); + // Look at instructions inside this block. + if (!I) continue; + if (I->getParent() != &BB) continue; + + // Stop when reaching PHI nodes. + if (isa<PHINode>(I)) { + assert(I == Induction && "Found a uniform PHI that is not the induction"); + break; + } + + // This is a known uniform. + Uniforms.insert(I); + + // Insert all operands. + for (int i=0, Op = I->getNumOperands(); i < Op; ++i) { + Worklist.push_back(I->getOperand(i)); + } + } + + return true; } bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { @@ -1262,6 +1383,13 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) { Reads.push_back(Ptr); } + // If we write (or read-write) to a single destination and there are no + // other reads in this loop then is it safe to vectorize. + if (ReadWrites.size() == 1 && Reads.size() == 0) { + DEBUG(dbgs() << "LV: Found a write-only loop!\n"); + return true; + } + // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. @@ -1420,10 +1548,9 @@ bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { return false; } const SCEV *Step = AR->getStepRecurrence(*SE); - const SCEV *Start = AR->getStart(); - if (!Step->isOne() || !Start->isZero()) { - DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n"); + if (!Step->isOne()) { + DEBUG(dbgs() << "LV: PHI stride does not equal one.\n"); return false; } return true; @@ -1478,11 +1605,25 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { assert(VTTI && "Invalid vector target transformation info"); + + // If we know that this instruction will remain uniform, check the cost of + // the scalar version. + if (Legal->isUniformAfterVectorization(I)) + VF = 1; + + Type *RetTy = I->getType(); + Type *VectorTy = ToVectorTy(RetTy, VF); + + + // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: + // We mark this instruction as zero-cost because scalar GEPs are usually + // lowered to the intruction addressing mode. At the moment we don't + // generate vector geps. return 0; case Instruction::Br: { - return VTTI->getInstrCost(I->getOpcode()); + return VTTI->getCFInstrCost(I->getOpcode()); } case Instruction::PHI: return 0; @@ -1504,74 +1645,76 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - Type *VTy = VectorType::get(I->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VTy); + return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy); } case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); - Type *VTy = VectorType::get(I->getType(), VF); const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); if (ScalarCond) CondTy = VectorType::get(CondTy, VF); - return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); } case Instruction::ICmp: case Instruction::FCmp: { - Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), VTy); + Type *ValTy = I->getOperand(0)->getType(); + VectorTy = ToVectorTy(ValTy, VF); + return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy); } case Instruction::Store: { StoreInst *SI = cast<StoreInst>(I); - Type *VTy = VectorType::get(SI->getValueOperand()->getType(), VF); + Type *ValTy = SI->getValueOperand()->getType(); + VectorTy = ToVectorTy(ValTy, VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), SI->getPointerAddressSpace()); // Scalarized stores. if (!Legal->isConsecutiveGep(SI->getPointerOperand())) { unsigned Cost = 0; - if (VF != 1) { - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, - VTy); - // The cost of extracting from the value vector and pointer vector. - Cost += VF * (ExtCost * 2); - } + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + ValTy); + // The cost of extracting from the value vector. + Cost += VF * (ExtCost); // The cost of the scalar stores. Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), - VTy->getScalarType(), + ValTy->getScalarType(), SI->getAlignment(), SI->getPointerAddressSpace()); return Cost; } // Wide stores. - return VTTI->getMemoryOpCost(I->getOpcode(), VTy, SI->getAlignment(), + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); - Type *VTy = VectorType::get(I->getType(), VF); + + if (VF == 1) + return VTTI->getMemoryOpCost(I->getOpcode(), RetTy, + LI->getAlignment(), + LI->getPointerAddressSpace()); // Scalarized loads. if (!Legal->isConsecutiveGep(LI->getPointerOperand())) { unsigned Cost = 0; - if (VF != 1) { - unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); - unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy); - - // The cost of inserting the loaded value into the result vector, and - // extracting from a vector of pointers. - Cost += VF * (InCost + ExCost); - } + unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy); + // The cost of inserting the loaded value into the result vector. + Cost += VF * (InCost); // The cost of the scalar stores. - Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(), + Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), + RetTy->getScalarType(), LI->getAlignment(), LI->getPointerAddressSpace()); return Cost; } // Wide loads. - return VTTI->getMemoryOpCost(I->getOpcode(), VTy, LI->getAlignment(), + return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), LI->getPointerAddressSpace()); } case Instruction::ZExt: @@ -1586,35 +1729,40 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - Type *SrcTy = VectorType::get(I->getOperand(0)->getType(), VF); - Type *DstTy = VectorType::get(I->getType(), VF); - return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy); + Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF); + return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } default: { // We are scalarizing the instruction. Return the cost of the scalar // instruction, plus the cost of insert and extract into vector // elements, times the vector width. unsigned Cost = 0; - Type *Ty = I->getType(); - if (!Ty->isVoidTy()) { - Type *VTy = VectorType::get(Ty, VF); - unsigned InsCost = VTTI->getInstrCost(Instruction::InsertElement, VTy); - unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy); - Cost += VF * (InsCost + ExtCost); - } + bool IsVoid = RetTy->isVoidTy(); - /// We don't have any information on the scalar instruction, but maybe - /// the target has. - /// TODO: This may be a target-specific intrinsic. - /// Need to add API for that. - Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty); + unsigned InsCost = (IsVoid ? 0 : + VTTI->getInstrCost(Instruction::InsertElement, + VectorTy)); + unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, + VectorTy); + + // The cost of inserting the results plus extracting each one of the + // operands. + Cost += VF * (InsCost + ExtCost * I->getNumOperands()); + + // The cost of executing VF copies of the scalar instruction. + Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy); return Cost; } }// end of switch. } +Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) { + if (Scalar->isVoidTy() || VF == 1) + return Scalar; + return VectorType::get(Scalar, VF); +} } // namespace diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 642d4fcffb..5a552c34e1 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -201,8 +201,8 @@ std::string Attributes::getAsString() const { Result += "nonlazybind "; if (hasAttribute(Attributes::AddressSafety)) Result += "address_safety "; - if (hasAttribute(Attributes::ForceSizeOpt)) - Result += "forcesizeopt "; + if (hasAttribute(Attributes::MinSize)) + Result += "minsize "; if (hasAttribute(Attributes::StackAlignment)) { Result += "alignstack("; Result += utostr(getStackAlignment()); @@ -326,7 +326,7 @@ uint64_t AttributesImpl::getAttrMask(uint64_t Val) { case Attributes::UWTable: return 1 << 30; case Attributes::NonLazyBind: return 1U << 31; case Attributes::AddressSafety: return 1ULL << 32; - case Attributes::ForceSizeOpt: return 1ULL << 33; + case Attributes::MinSize: return 1ULL << 33; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index ba807fcacc..06eab0e8f0 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LLVM_REQUIRES_RTTI 1) - add_llvm_library(LLVMCore AsmWriter.cpp Attributes.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index cc386a8d86..fe3edac42e 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -87,9 +87,13 @@ foldConstantCastPair( Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opc); + // Assume that pointers are never more than 64 bits wide. + IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext()); + // Let CastInst::isEliminableCastPair do the heavy lifting. return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, - Type::getInt64Ty(DstTy->getContext())); + FakeIntPtrTy, FakeIntPtrTy, + FakeIntPtrTy); } static Constant *FoldBitCast(Constant *V, Type *DestTy) { @@ -514,10 +518,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, return UndefValue::get(DestTy); } - // No compile-time operations on this type yet. - if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty()) - return 0; - if (V->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); @@ -576,6 +576,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, DestTy->isDoubleTy() ? APFloat::IEEEdouble : DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended : DestTy->isFP128Ty() ? APFloat::IEEEquad : + DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble : APFloat::Bogus, APFloat::rmNearestTiesToEven, &ignored); return ConstantFP::get(V->getContext(), Val); @@ -646,7 +647,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, case Instruction::SIToFP: if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { APInt api = CI->getValue(); - APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true); + APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), + !DestTy->isPPC_FP128Ty() /* isEEEE */); (void)apf.convertFromAPInt(api, opc==Instruction::SIToFP, APFloat::rmNearestTiesToEven); @@ -867,10 +869,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1, Constant *C2) { - // No compile-time operations on this type yet. - if (C1->getType()->isPPC_FP128Ty()) - return 0; - // Handle UndefValue up front. if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) { switch (Opcode) { @@ -1273,10 +1271,6 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { assert(V1->getType() == V2->getType() && "Cannot compare values of different types!"); - // No compile-time operations on this type yet. - if (V1->getType()->isPPC_FP128Ty()) - return FCmpInst::BAD_FCMP_PREDICATE; - // Handle degenerate case quickly if (V1 == V2) return FCmpInst::FCMP_OEQ; @@ -1602,10 +1596,6 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); } - // No compile-time operations on this type yet. - if (C1->getType()->isPPC_FP128Ty()) - return 0; - // icmp eq/ne(null,GV) -> false/true if (C1->isNullValue()) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2)) diff --git a/lib/VMCore/DIBuilder.cpp b/lib/VMCore/DIBuilder.cpp index c331304df1..152b825523 100644 --- a/lib/VMCore/DIBuilder.cpp +++ b/lib/VMCore/DIBuilder.cpp @@ -492,7 +492,8 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - Constant::getNullValue(Type::getInt32Ty(VMContext)) + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), }; return DIType(MDNode::get(VMContext, Elts)); } diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp index 104e5da057..19cf0f5cd3 100644 --- a/lib/VMCore/DataLayout.cpp +++ b/lib/VMCore/DataLayout.cpp @@ -524,14 +524,6 @@ std::string DataLayout::getStringRepresentation() const { return OS.str(); } -unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const -{ - if (Ty->isPointerTy()) return getTypeSizeInBits(Ty); - if (Ty->isVectorTy() - && cast<VectorType>(Ty)->getElementType()->isPointerTy()) - return getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()); - return getPointerSizeInBits(0); -} uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -668,33 +660,26 @@ unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const { return Log2_32(Align); } -/// getIntPtrType - Return an integer type that is the same size or -/// greater to the pointer size for the address space. +/// getIntPtrType - Return an integer type with size at least as big as that +/// of a pointer in the given address space. IntegerType *DataLayout::getIntPtrType(LLVMContext &C, unsigned AddressSpace) const { return IntegerType::get(C, getPointerSizeInBits(AddressSpace)); } -/// getIntPtrType - Return an integer type that is the same size or -/// greater to the pointer size of the specific PointerType. -IntegerType *DataLayout::getIntPtrType(Type *Ty) const { - LLVMContext &C = Ty->getContext(); - // For pointers, we return the size for the specific address space. - if (Ty->isPointerTy()) return IntegerType::get(C, getTypeSizeInBits(Ty)); - // For vector of pointers, we return the size of the address space - // of the pointer type. - if (Ty->isVectorTy() && cast<VectorType>(Ty)->getElementType()->isPointerTy()) - return IntegerType::get(C, - getTypeSizeInBits(cast<VectorType>(Ty)->getElementType())); - // Otherwise return the address space for the default address space. - // An example of this occuring is that you want to get the IntPtr - // for all of the arguments in a function. However, the IntPtr - // for a non-pointer type cannot be determined by the type, so - // the default value is used. - return getIntPtrType(C, 0); +/// getIntPtrType - Return an integer (vector of integer) type with size at +/// least as big as that of a pointer of the given pointer (vector of pointer) +/// type. +Type *DataLayout::getIntPtrType(Type *Ty) const { + assert(Ty->isPtrOrPtrVectorTy() && + "Expected a pointer or pointer vector type."); + unsigned NumBits = getTypeSizeInBits(Ty->getScalarType()); + IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); + if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) + return VectorType::get(IntTy, VecTy->getNumElements()); + return IntTy; } - uint64_t DataLayout::getIndexedOffset(Type *ptrTy, ArrayRef<Value *> Indices) const { Type *Ty = ptrTy; diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index e9b96d6cd2..94bd2a1563 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -1399,18 +1399,6 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) { return getIndexedTypeInternal(Ptr, IdxList); } -unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) { - Type *Ty = Ptr->getType(); - - if (VectorType *VTy = dyn_cast<VectorType>(Ty)) - Ty = VTy->getElementType(); - - if (PointerType *PTy = dyn_cast<PointerType>(Ty)) - return PTy->getAddressSpace(); - - llvm_unreachable("Invalid GEP pointer type"); -} - /// hasAllZeroIndices - Return true if all of the indices of this GEP are /// zeros. If so, the result pointer and the first operand have the same /// value, just potentially different types. @@ -2120,17 +2108,6 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const { return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); } -/// @brief Determine if a cast is a no-op -bool CastInst::isNoopCast(const DataLayout &DL) const { - unsigned AS = 0; - if (getOpcode() == Instruction::PtrToInt) - AS = getOperand(0)->getType()->getPointerAddressSpace(); - else if (getOpcode() == Instruction::IntToPtr) - AS = getType()->getPointerAddressSpace(); - Type *IntPtrTy = DL.getIntPtrType(getContext(), AS); - return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); -} - /// This function determines if a pair of casts can be eliminated and what /// opcode should be used in the elimination. This assumes that there are two /// instructions like this: @@ -2141,7 +2118,8 @@ bool CastInst::isNoopCast(const DataLayout &DL) const { /// If no such cast is permited, the function returns 0. unsigned CastInst::isEliminableCastPair( Instruction::CastOps firstOp, Instruction::CastOps secondOp, - Type *SrcTy, Type *MidTy, Type *DstTy, Type *IntPtrTy) { + Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, + Type *DstIntPtrTy) { // Define the 144 possibilities for these two cast instructions. The values // in this matrix determine what to do in a given situation and select the // case in the switch below. The rows correspond to firstOp, the columns @@ -2244,9 +2222,9 @@ unsigned CastInst::isEliminableCastPair( return 0; case 7: { // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size - if (!IntPtrTy) + if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy) return 0; - unsigned PtrSize = IntPtrTy->getScalarSizeInBits(); + unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits(); unsigned MidSize = MidTy->getScalarSizeInBits(); if (MidSize >= PtrSize) return Instruction::BitCast; @@ -2285,9 +2263,9 @@ unsigned CastInst::isEliminableCastPair( return 0; case 13: { // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize - if (!IntPtrTy) + if (!MidIntPtrTy) return 0; - unsigned PtrSize = IntPtrTy->getScalarSizeInBits(); + unsigned PtrSize = MidIntPtrTy->getScalarSizeInBits(); unsigned SrcSize = SrcTy->getScalarSizeInBits(); unsigned DstSize = DstTy->getScalarSizeInBits(); if (SrcSize <= PtrSize && SrcSize == DstSize) diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile index 2b9b0f258c..8b9865152e 100644 --- a/lib/VMCore/Makefile +++ b/lib/VMCore/Makefile @@ -9,7 +9,6 @@ LEVEL = ../.. LIBRARYNAME = LLVMCore BUILD_ARCHIVE = 1 -REQUIRES_RTTI = 1 BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen diff --git a/lib/VMCore/TargetTransformInfo.cpp b/lib/VMCore/TargetTransformInfo.cpp index 219d24c1f5..e91c29c456 100644 --- a/lib/VMCore/TargetTransformInfo.cpp +++ b/lib/VMCore/TargetTransformInfo.cpp @@ -25,7 +25,7 @@ TargetTransformInfo::TargetTransformInfo() : ImmutablePass(ID) { "Tool did not specify a TargetTransformInfo to use?"); } -INITIALIZE_PASS(TargetTransformInfo, "TargetTransformInfo", +INITIALIZE_PASS(TargetTransformInfo, "targettransforminfo", "Target Transform Info", false, true) char TargetTransformInfo::ID = 0; diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 54146e118c..1656ab2cab 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -47,35 +47,17 @@ Type *Type::getScalarType() { return this; } +const Type *Type::getScalarType() const { + if (const VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy->getElementType(); + return this; +} + /// isIntegerTy - Return true if this is an IntegerType of the specified width. bool Type::isIntegerTy(unsigned Bitwidth) const { return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; } -/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of -/// integer types. -/// -bool Type::isIntOrIntVectorTy() const { - if (isIntegerTy()) - return true; - if (getTypeID() != Type::VectorTyID) return false; - - return cast<VectorType>(this)->getElementType()->isIntegerTy(); -} - -/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types. -/// -bool Type::isFPOrFPVectorTy() const { - if (getTypeID() == Type::HalfTyID || getTypeID() == Type::FloatTyID || - getTypeID() == Type::DoubleTyID || - getTypeID() == Type::FP128TyID || getTypeID() == Type::X86_FP80TyID || - getTypeID() == Type::PPC_FP128TyID) - return true; - if (getTypeID() != Type::VectorTyID) return false; - - return cast<VectorType>(this)->getElementType()->isFloatingPointTy(); -} - // canLosslesslyBitCastTo - Return true if this type can be converted to // 'Ty' without any reinterpretation of bits. For example, i8* to i32*. // @@ -233,12 +215,7 @@ unsigned Type::getVectorNumElements() const { } unsigned Type::getPointerAddressSpace() const { - if (isPointerTy()) - return cast<PointerType>(this)->getAddressSpace(); - if (isVectorTy()) - return getSequentialElementType()->getPointerAddressSpace(); - llvm_unreachable("Should never reach here!"); - return 0; + return cast<PointerType>(getScalarType())->getAddressSpace(); } diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac index bd0b16a4a6..8012c23412 100644 --- a/projects/sample/autoconf/configure.ac +++ b/projects/sample/autoconf/configure.ac @@ -304,8 +304,8 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - mips-*) llvm_cv_target_arch="Mips" ;; - mipsel-*) llvm_cv_target_arch="Mips" ;; + mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; + mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; diff --git a/projects/sample/configure b/projects/sample/configure index 0e341fbb34..3baa1a7e16 100755 --- a/projects/sample/configure +++ b/projects/sample/configure @@ -3845,8 +3845,8 @@ else sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; arm*-*) llvm_cv_target_arch="ARM" ;; - mips-*) llvm_cv_target_arch="Mips" ;; - mipsel-*) llvm_cv_target_arch="Mips" ;; + mips-* | mips64-*) llvm_cv_target_arch="Mips" ;; + mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; hexagon-*) llvm_cv_target_arch="Hexagon" ;; diff --git a/runtime/libprofile/CommonProfiling.c b/runtime/libprofile/CommonProfiling.c index acc17ce11e..8f4119c2c6 100644 --- a/runtime/libprofile/CommonProfiling.c +++ b/runtime/libprofile/CommonProfiling.c @@ -28,14 +28,35 @@ static char *SavedArgs = 0; static unsigned SavedArgsLength = 0; +static const char *SavedEnvVar = 0; static const char *OutputFilename = "llvmprof.out"; +/* check_environment_variable - Check to see if the LLVMPROF_OUTPUT environment + * variable is set. If it is then save it and set OutputFilename. + */ +static void check_environment_variable(void) { + const char *EnvVar; + if (SavedEnvVar) return; /* Guarantee that we can't leak memory. */ + + if ((EnvVar = getenv("LLVMPROF_OUTPUT")) != NULL) { + /* The string that getenv returns is allowed to be statically allocated, + * which means it may be changed by future calls to getenv, so copy it. + */ + SavedEnvVar = strdup(EnvVar); + OutputFilename = SavedEnvVar; + } +} + /* save_arguments - Save argc and argv as passed into the program for the file * we output. + * If either the LLVMPROF_OUTPUT environment variable or the -llvmprof-output + * command line argument are set then change OutputFilename to the provided + * value. The command line argument value overrides the environment variable. */ int save_arguments(int argc, const char **argv) { unsigned Length, i; + if (!SavedEnvVar && !SavedArgs) check_environment_variable(); if (SavedArgs || !argv) return argc; /* This can be called multiple times */ /* Check to see if there are any arguments passed into the program for the @@ -54,6 +75,7 @@ int save_arguments(int argc, const char **argv) { puts("-llvmprof-output requires a filename argument!"); else { OutputFilename = strdup(argv[1]); + if (SavedEnvVar) { free((void *)SavedEnvVar); SavedEnvVar = 0; } memmove((char**)&argv[1], &argv[2], (argc-1)*sizeof(char*)); --argc; } diff --git a/test/Analysis/BasicAA/nocapture.ll b/test/Analysis/BasicAA/nocapture.ll index a8658ec801..ffc0a09a07 100644 --- a/test/Analysis/BasicAA/nocapture.ll +++ b/test/Analysis/BasicAA/nocapture.ll @@ -13,3 +13,24 @@ define i32 @test2() { ret i32 %c } +declare void @test3(i32** %p, i32* %q) nounwind + +define i32 @test4(i32* noalias nocapture %p) nounwind { +; CHECK: call void @test3 +; CHECK: store i32 0, i32* %p +; CHECK: store i32 1, i32* %x +; CHECK: %y = load i32* %p +; CHECK: ret i32 %y +entry: + %q = alloca i32* + ; Here test3 might store %p to %q. This doesn't violate %p's nocapture + ; attribute since the copy doesn't outlive the function. + call void @test3(i32** %q, i32* %p) nounwind + store i32 0, i32* %p + %x = load i32** %q + ; This store might write to %p and so we can't eliminate the subsequent + ; load + store i32 1, i32* %x + %y = load i32* %p + ret i32 %y +} diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll new file mode 100644 index 0000000000..58b4a7c426 --- /dev/null +++ b/test/Analysis/CostModel/X86/arith.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @add(i32 %arg) { + ;CHECK: cost of 1 {{.*}} add + %A = add <4 x i32> undef, undef + ;CHECK: cost of 4 {{.*}} add + %B = add <8 x i32> undef, undef + ;CHECK: cost of 1 {{.*}} add + %C = add <2 x i64> undef, undef + ;CHECK: cost of 4 {{.*}} add + %D = add <4 x i64> undef, undef + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + + +define i32 @xor(i32 %arg) { + ;CHECK: cost of 1 {{.*}} xor + %A = xor <4 x i32> undef, undef + ;CHECK: cost of 1 {{.*}} xor + %B = xor <8 x i32> undef, undef + ;CHECK: cost of 1 {{.*}} xor + %C = xor <2 x i64> undef, undef + ;CHECK: cost of 1 {{.*}} xor + %D = xor <4 x i64> undef, undef + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + + +define i32 @fmul(i32 %arg) { + ;CHECK: cost of 1 {{.*}} fmul + %A = fmul <4 x float> undef, undef + ;CHECK: cost of 1 {{.*}} fmul + %B = fmul <8 x float> undef, undef + ret i32 undef +} diff --git a/test/Analysis/CostModel/X86/insert-extract-at-zero.ll b/test/Analysis/CostModel/X86/insert-extract-at-zero.ll new file mode 100644 index 0000000000..eea5b601d0 --- /dev/null +++ b/test/Analysis/CostModel/X86/insert-extract-at-zero.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) { + ;CHECK: cost of 0 {{.*}} extract + %A = extractelement <4 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %B = extractelement <4 x i32> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %C = extractelement <4 x float> undef, i32 1 + + ;CHECK: cost of 0 {{.*}} extract + %D = extractelement <8 x float> undef, i32 0 + ;CHECK: cost of 1 {{.*}} extract + %E = extractelement <8 x float> undef, i32 1 + + ;CHECK: cost of 1 {{.*}} extract + %F = extractelement <8 x float> undef, i32 %arg + + ;CHECK: cost of 0 {{.*}} insert + %G = insertelement <4 x float> undef, float %fl, i32 0 + ;CHECK: cost of 1 {{.*}} insert + %H = insertelement <4 x float> undef, float %fl, i32 1 + ;CHECK: cost of 1 {{.*}} insert + %I = insertelement <4 x i32> undef, i32 %arg, i32 0 + + ;CHECK: cost of 0 {{.*}} insert + %J = insertelement <4 x double> undef, double undef, i32 0 + + ret i32 0 +} diff --git a/test/Analysis/CostModel/X86/lit.local.cfg b/test/Analysis/CostModel/X86/lit.local.cfg new file mode 100644 index 0000000000..a8ad0f1a28 --- /dev/null +++ b/test/Analysis/CostModel/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll new file mode 100644 index 0000000000..260a60676a --- /dev/null +++ b/test/Analysis/CostModel/X86/loop_v2.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +define i32 @foo(i32* nocapture %A) nounwind uwtable readonly ssp { +vector.ph: + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ] + %0 = getelementptr inbounds i32* %A, i64 %index + %1 = bitcast i32* %0 to <2 x i32>* + %2 = load <2 x i32>* %1, align 4 + %3 = sext <2 x i32> %2 to <2 x i64> + ;CHECK: cost of 1 {{.*}} extract + %4 = extractelement <2 x i64> %3, i32 0 + %5 = getelementptr inbounds i32* %A, i64 %4 + ;CHECK: cost of 1 {{.*}} extract + %6 = extractelement <2 x i64> %3, i32 1 + %7 = getelementptr inbounds i32* %A, i64 %6 + %8 = load i32* %5, align 4, !tbaa !0 + ;CHECK: cost of 1 {{.*}} insert + %9 = insertelement <2 x i32> undef, i32 %8, i32 0 + %10 = load i32* %7, align 4, !tbaa !0 + ;CHECK: cost of 1 {{.*}} insert + %11 = insertelement <2 x i32> %9, i32 %10, i32 1 + %12 = add nsw <2 x i32> %11, %vec.phi + %index.next = add i64 %index, 2 + %13 = icmp eq i64 %index.next, 192 + br i1 %13, label %for.end, label %vector.body + +for.end: ; preds = %vector.body + %14 = extractelement <2 x i32> %12, i32 0 + %15 = extractelement <2 x i32> %12, i32 1 + %16 = add i32 %14, %15 + ret i32 %16 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/CostModel/X86/tiny.ll b/test/Analysis/CostModel/X86/tiny.ll new file mode 100644 index 0000000000..cc7b443a7d --- /dev/null +++ b/test/Analysis/CostModel/X86/tiny.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: cost of 1 {{.*}} add +;CHECK: cost of 1 {{.*}} ret +define i32 @no_info(i32 %arg) { + %e = add i32 %arg, %arg + ret i32 %e +} diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll new file mode 100644 index 0000000000..7919a9ca9a --- /dev/null +++ b/test/Analysis/CostModel/X86/vectorized-loop.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define i32 @foo(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %start, i32 %end) nounwind uwtable ssp { +entry: + ;CHECK: cost of 1 {{.*}} icmp + %cmp7 = icmp slt i32 %start, %end + br i1 %cmp7, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + ;CHECK: cost of 1 {{.*}} sext + %0 = sext i32 %start to i64 + %1 = sub i32 %end, %start + %2 = zext i32 %1 to i64 + %end.idx = add i64 %2, %0 + ;CHECK: cost of 1 {{.*}} add + %n.vec = and i64 %2, 4294967288 + %end.idx.rnd.down = add i64 %n.vec, %0 + ;CHECK: cost of 1 {{.*}} icmp + %cmp.zero = icmp eq i64 %n.vec, 0 + br i1 %cmp.zero, label %middle.block, label %vector.body + +vector.body: ; preds = %for.body.lr.ph, %vector.body + %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ] + %3 = add i64 %index, 2 + %4 = getelementptr inbounds i32* %B, i64 %3 + ;CHECK: cost of 0 {{.*}} bitcast + %5 = bitcast i32* %4 to <8 x i32>* + ;CHECK: cost of 1 {{.*}} load + %6 = load <8 x i32>* %5, align 4 + ;CHECK: cost of 4 {{.*}} mul + %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> + %8 = getelementptr inbounds i32* %A, i64 %index + %9 = bitcast i32* %8 to <8 x i32>* + %10 = load <8 x i32>* %9, align 4 + ;CHECK: cost of 4 {{.*}} add + %11 = add nsw <8 x i32> %10, %7 + ;CHECK: cost of 1 {{.*}} store + store <8 x i32> %11, <8 x i32>* %9, align 4 + %index.next = add i64 %index, 8 + %12 = icmp eq i64 %index.next, %end.idx.rnd.down + ;CHECK: cost of 1 {{.*}} br + br i1 %12, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body, %for.body.lr.ph + %cmp.n = icmp eq i64 %end.idx, %end.idx.rnd.down + br i1 %cmp.n, label %for.end, label %for.body + +for.body: ; preds = %middle.block, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ] + %13 = add nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %B, i64 %13 + ;CHECK: cost of 1 {{.*}} load + %14 = load i32* %arrayidx, align 4, !tbaa !0 + ;CHECK: cost of 1 {{.*}} mul + %mul = mul nsw i32 %14, 5 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv + ;CHECK: cost of 1 {{.*}} load + %15 = load i32* %arrayidx2, align 4, !tbaa !0 + %add3 = add nsw i32 %15, %mul + store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + ;CHECK: cost of 0 {{.*}} trunc + %16 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %16, %end + ;CHECK: cost of 1 {{.*}} br + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %middle.block, %for.body, %entry + ;CHECK: cost of 1 {{.*}} ret + ret i32 undef +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Analysis/LoopDependenceAnalysis/lit.local.cfg b/test/Analysis/CostModel/lit.local.cfg index 19eebc0ac7..19eebc0ac7 100644 --- a/test/Analysis/LoopDependenceAnalysis/lit.local.cfg +++ b/test/Analysis/CostModel/lit.local.cfg diff --git a/test/Analysis/CostModel/no_info.ll b/test/Analysis/CostModel/no_info.ll new file mode 100644 index 0000000000..d20d56b79a --- /dev/null +++ b/test/Analysis/CostModel/no_info.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -cost-model -analyze | FileCheck %s + +; The cost model does not have any target information so it can't make a decision. +; Notice that OPT does not read the triple information from the module itself, only through the command line. + +; This info ignored: +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: Unknown cost {{.*}} add +;CHECK: Unknown cost {{.*}} ret +define i32 @no_info(i32 %arg) { + %e = add i32 %arg, %arg + ret i32 %e +} diff --git a/test/Analysis/LoopDependenceAnalysis/alias.ll b/test/Analysis/LoopDependenceAnalysis/alias.ll deleted file mode 100644 index 78d0bf4fee..0000000000 --- a/test/Analysis/LoopDependenceAnalysis/alias.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s - -;; x[5] = x[6] // with x being a pointer passed as argument - -define void @f1(i32* nocapture %xptr) nounwind { -entry: - %x.ld.addr = getelementptr i32* %xptr, i64 6 - %x.st.addr = getelementptr i32* %xptr, i64 5 - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x = load i32* %x.ld.addr - store i32 %x, i32* %x.st.addr -; CHECK: 0,1: dep - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; x[5] = x[6] // with x being an array on the stack - -define void @foo(...) nounwind { -entry: - %xptr = alloca [256 x i32], align 4 - %x.ld.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 6 - %x.st.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 5 - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x = load i32* %x.ld.addr - store i32 %x, i32* %x.st.addr -; CHECK: 0,1: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/test/Analysis/LoopDependenceAnalysis/siv-strong.ll b/test/Analysis/LoopDependenceAnalysis/siv-strong.ll deleted file mode 100644 index 401e466d66..0000000000 --- a/test/Analysis/LoopDependenceAnalysis/siv-strong.ll +++ /dev/null @@ -1,110 +0,0 @@ -; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s - -@x = common global [256 x i32] zeroinitializer, align 4 -@y = common global [256 x i32] zeroinitializer, align 4 - -;; for (i = 0; i < 256; i++) -;; x[i] = x[i] + y[i] - -define void @f1(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %x = load i32* %x.addr ; 0 - %y = load i32* %y.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; for (i = 0; i < 256; i++) -;; x[i+1] = x[i] + y[i] - -define void @f2(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %i.next = add i64 %i, 1 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.next - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; for (i = 0; i < 10; i++) -;; x[i+20] = x[i] + y[i] - -define void @f3(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %i.20 = add i64 %i, 20 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.20 - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 10 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; for (i = 0; i < 10; i++) -;; x[10*i+1] = x[10*i] + y[i] - -define void @f4(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %i.10 = mul i64 %i, 10 - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.10 - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10 - %i.10.1 = add i64 %i.10, 1 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10.1 - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 10 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll deleted file mode 100644 index 9d0128c5fe..0000000000 --- a/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll +++ /dev/null @@ -1,118 +0,0 @@ -; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s - -@x = common global [256 x i32] zeroinitializer, align 4 -@y = common global [256 x i32] zeroinitializer, align 4 - -;; for (i = 0; i < 256; i++) -;; x[i] = x[255 - i] + y[i] - -define void @f1(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %i.255 = sub i64 255, %i - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; for (i = 0; i < 100; i++) -;; x[i] = x[255 - i] + y[i] - -define void @f2(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %i.255 = sub i64 255, %i - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 100 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; // the first iteration (i=0) leads to an out-of-bounds access of x. as the -;; // result of this access is undefined, _any_ dependence result is safe. -;; for (i = 0; i < 256; i++) -;; x[i] = x[256 - i] + y[i] - -define void @f3(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %i.256 = sub i64 0, %i - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; // slightly contrived but valid IR for the following loop, where all -;; // accesses in all iterations are within bounds. while this example's first -;; // (ZIV-)subscript is (0, 1), accesses are dependent. -;; for (i = 1; i < 256; i++) -;; x[i] = x[256 - i] + y[i] - -define void @f4(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %i.1 = add i64 1, %i - %i.256 = sub i64 -1, %i - %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.1 - %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.1 - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.ld.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.st.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll b/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll deleted file mode 100644 index 1c5ae4c490..0000000000 --- a/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s - -@x = common global [256 x i32] zeroinitializer, align 4 -@y = common global [256 x i32] zeroinitializer, align 4 - -;; for (i = 0; i < 256; i++) -;; x[i] = x[42] + y[i] - -define void @f1(...) nounwind { -entry: - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 42 - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; for (i = 0; i < 250; i++) -;; x[i] = x[255] + y[i] - -define void @f2(...) nounwind { -entry: - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 255 - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i - %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i - %x = load i32* %x.ld.addr ; 0 - %y = load i32* %y.addr ; 1 - %r = add i32 %y, %x - store i32 %r, i32* %x.addr ; 2 -; CHECK: 0,2: dep -; CHECK: 1,2: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 250 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/test/Analysis/LoopDependenceAnalysis/ziv.ll b/test/Analysis/LoopDependenceAnalysis/ziv.ll deleted file mode 100644 index 645ae7f152..0000000000 --- a/test/Analysis/LoopDependenceAnalysis/ziv.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s - -@x = common global [256 x i32] zeroinitializer, align 4 - -;; x[5] = x[6] - -define void @f1(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6) - store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 5) -; CHECK: 0,1: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; x[c] = x[c+1] // with c being a loop-invariant constant - -define void @f2(i64 %c0) nounwind { -entry: - %c1 = add i64 %c0, 1 - %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c0 - %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c1 - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x = load i32* %x.ld.addr - store i32 %x, i32* %x.st.addr -; CHECK: 0,1: ind - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -;; x[6] = x[6] - -define void @f3(...) nounwind { -entry: - br label %for.body - -for.body: - %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] - %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6) - store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 6) -; CHECK: 0,1: dep - %i.next = add i64 %i, 1 - %exitcond = icmp eq i64 %i.next, 256 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index 9df823ab97..c812836957 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -2,7 +2,7 @@ ; ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null ; REQUIRES: loadable_module -; XFAIL: lto +; XFAIL: lto_on_osx define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll index 98c79ee03a..6dc9574bbe 100644 --- a/test/BugPoint/metadata.ll +++ b/test/BugPoint/metadata.ll @@ -1,7 +1,7 @@ ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; REQUIRES: loadable_module -; XFAIL: lto +; XFAIL: lto_on_osx ; Bugpoint should keep the call's metadata attached to the call. diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index 13aa9c5a65..5a45f846e1 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,7 +1,7 @@ ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; REQUIRES: loadable_module -; XFAIL: lto +; XFAIL: lto_on_osx ; Test to make sure that arguments are removed from the function if they are ; unnecessary. And clean up any types that that frees up too. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4f099a922a..e10a532341 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -21,7 +21,12 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests" llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-diff llvm-dis llvm-extract llvm-dwarfdump - llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj + llvm-link + llvm-mc + llvm-mcmarkup + llvm-nm + llvm-objdump + llvm-readobj macho-dump opt profile_rt-shared FileCheck count not diff --git a/test/CodeGen/ARM/call-noret-minsize.ll b/test/CodeGen/ARM/call-noret-minsize.ll new file mode 100644 index 0000000000..35490ac69b --- /dev/null +++ b/test/CodeGen/ARM/call-noret-minsize.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2 +; rdar://12348580 + +define void @t1() noreturn minsize nounwind ssp { +entry: +; ARM: t1: +; ARM: bl _bar + +; SWIFT: t1: +; SWIFT: bl _bar + +; T2: t1: +; T2: blx _bar + tail call void @bar() noreturn nounwind + unreachable +} + +define void @t2() noreturn minsize nounwind ssp { +entry: +; ARM: t2: +; ARM: bl _t1 + +; SWIFT: t2: +; SWIFT: bl _t1 + +; T2: t2: +; T2: bl _t1 + tail call void @t1() noreturn nounwind + unreachable +} + +declare void @bar() noreturn diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll index 238ba24a79..14511ad5ce 100644 --- a/test/CodeGen/ARM/coalesce-subregs.ll +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -289,3 +289,31 @@ bb: %tmp18 = insertvalue %struct.wombat.5 %tmp17, <4 x float> undef, 3, 0 ret %struct.wombat.5 %tmp18 } + +; CHECK: adjustCopiesBackFrom +; The shuffle in if.else3 must be preserved even though adjustCopiesBackFrom +; is tempted to remove it. +; CHECK: %if.else3 +; CHECK: vorr d +define internal void @adjustCopiesBackFrom(<2 x i64>* noalias nocapture sret %agg.result, <2 x i64> %in) { +entry: + %0 = extractelement <2 x i64> %in, i32 0 + %cmp = icmp slt i64 %0, 1 + %.in = select i1 %cmp, <2 x i64> <i64 0, i64 undef>, <2 x i64> %in + %1 = extractelement <2 x i64> %in, i32 1 + %cmp1 = icmp slt i64 %1, 1 + br i1 %cmp1, label %if.then2, label %if.else3 + +if.then2: ; preds = %entry + %2 = insertelement <2 x i64> %.in, i64 0, i32 1 + br label %if.end4 + +if.else3: ; preds = %entry + %3 = shufflevector <2 x i64> %.in, <2 x i64> %in, <2 x i32> <i32 0, i32 3> + br label %if.end4 + +if.end4: ; preds = %if.else3, %if.then2 + %result.2 = phi <2 x i64> [ %2, %if.then2 ], [ %3, %if.else3 ] + store <2 x i64> %result.2, <2 x i64>* %agg.result, align 128 + ret void +} diff --git a/test/CodeGen/ARM/integer_insertelement.ll b/test/CodeGen/ARM/integer_insertelement.ll index 4f2d7e3f73..1d72afefb5 100644 --- a/test/CodeGen/ARM/integer_insertelement.ll +++ b/test/CodeGen/ARM/integer_insertelement.ll @@ -6,7 +6,7 @@ ; CHECK: @f ; CHECK-NOT: vorr d -; CHECK: vmov s +; CHECK: vmov.32 d ; CHECK-NOT: vorr d ; CHECK: mov pc, lr define <4 x i32> @f(<4 x i32> %in) { diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index e224bdfe25..f404eb8be5 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -74,6 +74,39 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { ret <16 x i8> %tmp3 } +define <16 x i8> @test_vextq_undef_op2(<16 x i8> %a) nounwind { +;CHECK: test_vextq_undef_op2: +;CHECK: vext +entry: + %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> + ret <16 x i8> %tmp1 +} + +define <8 x i8> @test_vextd_undef_op2(<8 x i8> %a) nounwind { +;CHECK: test_vextd_undef_op2: +;CHECK: vext +entry: + %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> + ret <8 x i8> %tmp1 +} + + +define <16 x i8> @test_vextq_undef_op2_undef(<16 x i8> %a) nounwind { +;CHECK: test_vextq_undef_op2_undef: +;CHECK: vext +entry: + %tmp1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1> + ret <16 x i8> %tmp1 +} + +define <8 x i8> @test_vextd_undef_op2_undef(<8 x i8> %a) nounwind { +;CHECK: test_vextd_undef_op2_undef: +;CHECK: vext +entry: + %tmp1 = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1> + ret <8 x i8> %tmp1 +} + ; Tests for ReconstructShuffle function. Indices have to be carefully ; chosen to reach lowering phase as a BUILD_VECTOR. diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index 2ed65c9aee..c9ce3b7450 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -200,7 +200,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { ;CHECK: vsetQ_lane32: -;CHECK: vmov s +;CHECK: vmov.32 d{{.*}}[1], r1 %tmp1 = load <4 x i32>* %A %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 ret <4 x i32> %tmp2 diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll new file mode 100644 index 0000000000..731edae43c --- /dev/null +++ b/test/CodeGen/Mips/alloca16.ll @@ -0,0 +1,75 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@iiii = global i32 25, align 4 +@jjjj = global i32 35, align 4 +@kkkk = global i32 100, align 4 +@t = global i32 25, align 4 +@riii = common global i32 0, align 4 +@rjjj = common global i32 0, align 4 +@rkkk = common global i32 0, align 4 + +define void @temp(i32 %foo) nounwind { +entry: + %foo.addr = alloca i32, align 4 + store i32 %foo, i32* %foo.addr, align 4 + %0 = load i32* %foo.addr, align 4 + store i32 %0, i32* @t, align 4 + ret void +} + +define void @test() nounwind { +entry: +; 16: .frame $16,24,$ra +; 16: save $ra, $s0, $s1, 24 +; 16: move $16, $sp +; 16: move ${{[0-9]+}}, $sp +; 16: subu $[[REGISTER:[0-9]+]], ${{[0-9]+}}, ${{[0-9]+}} +; 16: move $sp, $[[REGISTER]] + %sssi = alloca i32, align 4 + %ip = alloca i32*, align 4 + %sssj = alloca i32, align 4 + %0 = load i32* @iiii, align 4 + store i32 %0, i32* %sssi, align 4 + %1 = load i32* @kkkk, align 4 + %mul = mul nsw i32 %1, 100 + %2 = alloca i8, i32 %mul + %3 = bitcast i8* %2 to i32* + store i32* %3, i32** %ip, align 4 + %4 = load i32* @jjjj, align 4 + store i32 %4, i32* %sssj, align 4 + %5 = load i32* @jjjj, align 4 + %6 = load i32* @iiii, align 4 + %7 = load i32** %ip, align 4 + %arrayidx = getelementptr inbounds i32* %7, i32 %6 + store i32 %5, i32* %arrayidx, align 4 + %8 = load i32* @kkkk, align 4 + %9 = load i32* @jjjj, align 4 + %10 = load i32** %ip, align 4 + %arrayidx1 = getelementptr inbounds i32* %10, i32 %9 + store i32 %8, i32* %arrayidx1, align 4 + %11 = load i32* @iiii, align 4 + %12 = load i32* @kkkk, align 4 + %13 = load i32** %ip, align 4 + %arrayidx2 = getelementptr inbounds i32* %13, i32 %12 + store i32 %11, i32* %arrayidx2, align 4 + %14 = load i32** %ip, align 4 + %arrayidx3 = getelementptr inbounds i32* %14, i32 25 + %15 = load i32* %arrayidx3, align 4 + store i32 %15, i32* @riii, align 4 + %16 = load i32** %ip, align 4 + %arrayidx4 = getelementptr inbounds i32* %16, i32 35 + %17 = load i32* %arrayidx4, align 4 + store i32 %17, i32* @rjjj, align 4 + %18 = load i32** %ip, align 4 + %arrayidx5 = getelementptr inbounds i32* %18, i32 100 + %19 = load i32* %arrayidx5, align 4 + store i32 %19, i32* @rkkk, align 4 + %20 = load i32* @t, align 4 + %21 = load i32** %ip, align 4 + %arrayidx6 = getelementptr inbounds i32* %21, i32 %20 + %22 = load i32* %arrayidx6, align 4 +; 16: save 16 + call void @temp(i32 %22) +; 16: restore 16 + ret void +} diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll index 050689dcea..819f258c2a 100644 --- a/test/CodeGen/Mips/atomic.ll +++ b/test/CodeGen/Mips/atomic.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mipsel < %s | FileCheck %s +; RUN: llc -march=mipsel --disable-machine-licm < %s | FileCheck %s @x = common global i32 0, align 4 @@ -181,8 +181,9 @@ entry: ; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) +; CHECK: and $[[R18:[0-9]+]], $[[R9]], $[[R6]] ; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] -; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R9]] +; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R18]] ; CHECK: sc $[[R14]], 0($[[R2]]) ; CHECK: beq $[[R14]], $zero, $[[BB0]] diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll new file mode 100644 index 0000000000..b9c3804e0d --- /dev/null +++ b/test/CodeGen/Mips/atomicops.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@.str = private unnamed_addr constant [8 x i8] c"%d, %d\0A\00", align 1 + +define i32 @foo(i32* %mem, i32 %val, i32 %c) nounwind { +entry: + %0 = atomicrmw add i32* %mem, i32 %val seq_cst + %add = add nsw i32 %0, %c + ret i32 %add +; 16: foo: +; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}}) +} + +define i32 @main() nounwind { +entry: + %x = alloca i32, align 4 + store volatile i32 0, i32* %x, align 4 + %0 = atomicrmw add i32* %x, i32 1 seq_cst + %add.i = add nsw i32 %0, 2 + %1 = load volatile i32* %x, align 4 + %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind + %2 = cmpxchg i32* %x, i32 1, i32 2 seq_cst + %3 = load volatile i32* %x, align 4 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind + %4 = atomicrmw xchg i32* %x, i32 1 seq_cst + %5 = load volatile i32* %x, align 4 + %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind +; 16: main: +; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_val_compare_and_swap_4)(${{[0-9]+}}) +; 16: lw ${{[0-9]+}}, %call16(__sync_lock_test_and_set_4)(${{[0-9]+}}) + + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + + diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll index 1b2fbc8932..2fdb736dc8 100644 --- a/test/CodeGen/Mips/brdelayslot.ll +++ b/test/CodeGen/Mips/brdelayslot.ll @@ -19,3 +19,19 @@ entry: } declare void @foo2(i32) + +; Check that cvt.d.w goes into jalr's delay slot. +; +define void @foo3(i32 %a) nounwind { +entry: +; Default: foo3: +; Default: jalr +; Default: cvt.d.w + + %conv = sitofp i32 %a to double + tail call void @foo4(double %conv) nounwind + ret void +} + +declare void @foo4(double) + diff --git a/test/CodeGen/Mips/brind.ll b/test/CodeGen/Mips/brind.ll new file mode 100644 index 0000000000..4c591fa1bb --- /dev/null +++ b/test/CodeGen/Mips/brind.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@main.L = internal unnamed_addr constant [5 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* blockaddress(@main, %L3), i8* blockaddress(@main, %L4), i8* null], align 4 +@str = private unnamed_addr constant [2 x i8] c"A\00" +@str5 = private unnamed_addr constant [2 x i8] c"B\00" +@str6 = private unnamed_addr constant [2 x i8] c"C\00" +@str7 = private unnamed_addr constant [2 x i8] c"D\00" +@str8 = private unnamed_addr constant [2 x i8] c"E\00" + +define i32 @main() nounwind { +entry: + %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0)) + br label %L1 + +L1: ; preds = %entry, %L3 + %i.0 = phi i32 [ 0, %entry ], [ %inc, %L3 ] + %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str5, i32 0, i32 0)) + br label %L2 + +L2: ; preds = %L1, %L3 + %i.1 = phi i32 [ %i.0, %L1 ], [ %inc, %L3 ] + %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str6, i32 0, i32 0)) + br label %L3 + +L3: ; preds = %L2, %L3 + %i.2 = phi i32 [ %i.1, %L2 ], [ %inc, %L3 ] + %puts7 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str7, i32 0, i32 0)) + %inc = add i32 %i.2, 1 + %arrayidx = getelementptr inbounds [5 x i8*]* @main.L, i32 0, i32 %i.2 + %0 = load i8** %arrayidx, align 4 + indirectbr i8* %0, [label %L1, label %L2, label %L3, label %L4] +; 16: jrc ${{[0-9]+}} +L4: ; preds = %L3 + %puts8 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str8, i32 0, i32 0)) + ret i32 0 +} + +declare i32 @puts(i8* nocapture) nounwind + + diff --git a/test/CodeGen/Mips/check-noat.ll b/test/CodeGen/Mips/check-noat.ll new file mode 100644 index 0000000000..bfeff677b3 --- /dev/null +++ b/test/CodeGen/Mips/check-noat.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +define void @f() nounwind readnone { +entry: +; CHECK: f: +; CHECK: .set noat +; CHECK: .set at + + ret void +} + diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll index bee93accd4..aee58b650e 100644 --- a/test/CodeGen/Mips/helloworld.ll +++ b/test/CodeGen/Mips/helloworld.ll @@ -24,10 +24,10 @@ entry: ; C1: addiu ${{[0-9]+}}, %lo($.str) ; C2: move $25, ${{[0-9]+}} ; C1: move $gp, ${{[0-9]+}} -; C1: jalr ${{[0-9]+}} +; C1: jalrc ${{[0-9]+}} ; SR: restore $ra, [[FS]] ; PE: li $2, 0 -; PE: jr $ra +; PE: jrc $ra } diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll new file mode 100644 index 0000000000..c6da8b1ac9 --- /dev/null +++ b/test/CodeGen/Mips/i32k.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16a +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16b + +@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1 + +define i32 @main() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind +; 16a: li ${{[0-9]+}}, 29905 +; 16b: li ${{[0-9]+}}, 16408 + %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind +; 16a: li ${{[0-9]+}}, 49127 +; 16b: li ${{[0-9]+}}, 35631 + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/Mips/largeimm1.ll b/test/CodeGen/Mips/largeimm1.ll index d65cc025d0..1c0f69c590 100644 --- a/test/CodeGen/Mips/largeimm1.ll +++ b/test/CodeGen/Mips/largeimm1.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=mipsel < %s | FileCheck %s -; CHECK: lui $at, 49152 -; CHECK: lui $at, 16384 +; CHECK: lui ${{[0-9]+}}, 49152 +; CHECK: lui ${{[0-9]+}}, 16384 define void @f() nounwind { entry: %a1 = alloca [1073741824 x i8], align 1 diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll index 2e548790cd..1e96346d1d 100644 --- a/test/CodeGen/Mips/largeimmprinting.ll +++ b/test/CodeGen/Mips/largeimmprinting.ll @@ -1,4 +1,6 @@ -; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s +; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32 +; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \ +; RUN: FileCheck %s -check-prefix=64 %struct.S1 = type { [65536 x i8] } @@ -6,9 +8,21 @@ define void @f() nounwind { entry: -; CHECK: lui $at, 65535 -; CHECK: addiu $at, $at, -16 -; CHECK: addu $sp, $sp, $at +; 32: lui $[[R0:[0-9]+]], 65535 +; 32: addiu $[[R0]], $[[R0]], -24 +; 32: addu $sp, $sp, $[[R0]] +; 32: lui $[[R1:[0-9]+]], 1 +; 32: addu $[[R1]], $sp, $[[R1]] +; 32: sw $ra, 20($[[R1]]) +; 64: daddiu $[[R0:[0-9]+]], $zero, 1 +; 64: dsll $[[R0]], $[[R0]], 48 +; 64: daddiu $[[R0]], $[[R0]], -1 +; 64: dsll $[[R0]], $[[R0]], 16 +; 64: daddiu $[[R0]], $[[R0]], -48 +; 64: daddu $sp, $sp, $[[R0]] +; 64: lui $[[R1:[0-9]+]], 1 +; 64: daddu $[[R1]], $sp, $[[R1]] +; 64: sd $ra, 40($[[R1]]) %agg.tmp = alloca %struct.S1, align 1 %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0 diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll new file mode 100644 index 0000000000..7763daec3b --- /dev/null +++ b/test/CodeGen/Mips/llcarry.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@i = global i64 4294967295, align 8 +@j = global i64 15, align 8 +@ii = global i64 4294967295, align 8 +@k = common global i64 0, align 8 +@l = common global i64 0, align 8 +@m = common global i64 0, align 8 + +define void @test1() nounwind { +entry: + %0 = load i64* @i, align 8 + %1 = load i64* @j, align 8 + %add = add nsw i64 %1, %0 + store i64 %add, i64* @k, align 8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} + ret void +} + +define void @test2() nounwind { +entry: + %0 = load i64* @i, align 8 + %1 = load i64* @j, align 8 + %sub = sub nsw i64 %0, %1 +; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} + store i64 %sub, i64* @l, align 8 + ret void +} + +define void @test3() nounwind { +entry: + %0 = load i64* @ii, align 8 + %add = add nsw i64 %0, 15 +; 16: addiu ${{[0-9]+}}, 15 +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: move ${{[0-9]+}}, $t8 +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} +; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}} + store i64 %add, i64* @m, align 8 + ret void +} + + diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll index 873b9f1410..1a4f79c191 100644 --- a/test/CodeGen/Mips/longbranch.ll +++ b/test/CodeGen/Mips/longbranch.ll @@ -6,15 +6,15 @@ define void @foo1(i32 %s) nounwind { entry: ; O32: bal -; O32: lui $at, 0 -; O32: addiu $at, $at, {{[0-9]+}} -; N64: lui $at, 0 -; N64: daddiu $at, $at, 0 -; N64: dsll $at, $at, 16 -; N64: daddiu $at, $at, 0 +; O32: lui $1, 0 +; O32: addiu $1, $1, {{[0-9]+}} +; N64: lui $1, 0 +; N64: daddiu $1, $1, 0 +; N64: dsll $1, $1, 16 +; N64: daddiu $1, $1, 0 ; N64: bal -; N64: dsll $at, $at, 16 -; N64: daddiu $at, $at, {{[0-9]+}} +; N64: dsll $1, $1, 16 +; N64: daddiu $1, $1, {{[0-9]+}} %tobool = icmp eq i32 %s, 0 br i1 %tobool, label %if.end, label %if.then diff --git a/test/CodeGen/Mips/misha.ll b/test/CodeGen/Mips/misha.ll new file mode 100644 index 0000000000..80637edb16 --- /dev/null +++ b/test/CodeGen/Mips/misha.ll @@ -0,0 +1,69 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +define i32 @sumc(i8* nocapture %to, i8* nocapture %from, i32) nounwind { +entry: + %sext = shl i32 %0, 16 + %conv = ashr exact i32 %sext, 16 + %cmp8 = icmp eq i32 %conv, 0 + br i1 %cmp8, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %.pre = load i8* %to, align 1 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %1 = phi i8 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ] + %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %from.addr.09 = phi i8* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i8* %from.addr.09, i32 1 + %2 = load i8* %from.addr.09, align 1 + %conv27 = zext i8 %2 to i32 + %conv36 = zext i8 %1 to i32 + %add = add nsw i32 %conv36, %conv27 + %conv4 = trunc i32 %add to i8 + store i8 %conv4, i8* %to, align 1 + %inc = add nsw i32 %i.010, 1 + %cmp = icmp eq i32 %inc, %conv + br i1 %cmp, label %for.end, label %for.body +; 16: sumc: +; 16: lbu ${{[0-9]+}}, 0(${{[0-9]+}}) +; 16: lbu ${{[0-9]+}}, 0(${{[0-9]+}}) +; 16: sum: +; 16: lhu ${{[0-9]+}}, 0(${{[0-9]+}}) +; 16: lhu ${{[0-9]+}}, 0(${{[0-9]+}}) + +for.end: ; preds = %for.body, %entry + ret i32 undef +} + +define i32 @sum(i16* nocapture %to, i16* nocapture %from, i32) nounwind { +entry: + %sext = shl i32 %0, 16 + %conv = ashr exact i32 %sext, 16 + %cmp8 = icmp eq i32 %conv, 0 + br i1 %cmp8, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %.pre = load i16* %to, align 2 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %1 = phi i16 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ] + %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %from.addr.09 = phi i16* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ] + %incdec.ptr = getelementptr inbounds i16* %from.addr.09, i32 1 + %2 = load i16* %from.addr.09, align 2 + %conv27 = zext i16 %2 to i32 + %conv36 = zext i16 %1 to i32 + %add = add nsw i32 %conv36, %conv27 + %conv4 = trunc i32 %add to i16 + store i16 %conv4, i16* %to, align 2 + %inc = add nsw i32 %i.010, 1 + %cmp = icmp eq i32 %inc, %conv + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 undef +} + + diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll index 7beae99c45..00c66a9928 100644 --- a/test/CodeGen/Mips/null.ll +++ b/test/CodeGen/Mips/null.ll @@ -8,6 +8,6 @@ entry: ; 16: .set mips16 # @main -; 16: jr $ra +; 16: jrc $ra } diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll index eac0d80c1c..5558ba6e10 100644 --- a/test/CodeGen/Mips/o32_cc_byval.ll +++ b/test/CodeGen/Mips/o32_cc_byval.ll @@ -119,6 +119,16 @@ entry: ret void } +%struct.S4 = type { [4 x i32] } + +define void @f5(i64 %a0, %struct.S4* nocapture byval %a1) nounwind { +entry: + tail call void @f6(%struct.S4* byval %a1, i64 %a0) nounwind + ret void +} + +declare void @f6(%struct.S4* nocapture byval, i64) + !0 = metadata !{metadata !"int", metadata !1} !1 = metadata !{metadata !"omnipotent char", metadata !2} !2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/Mips/remat-immed-load.ll b/test/CodeGen/Mips/remat-immed-load.ll new file mode 100644 index 0000000000..d93964bcae --- /dev/null +++ b/test/CodeGen/Mips/remat-immed-load.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32 +; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=64 + +define void @f0() nounwind { +entry: +; 32: addiu $4, $zero, 1 +; 32: addiu $4, $zero, 1 + + tail call void @foo1(i32 1) nounwind + tail call void @foo1(i32 1) nounwind + ret void +} + +declare void @foo1(i32) + +define void @f3() nounwind { +entry: +; 64: daddiu $4, $zero, 1 +; 64: daddiu $4, $zero, 1 + + tail call void @foo2(i64 1) nounwind + tail call void @foo2(i64 1) nounwind + ret void +} + +declare void @foo2(i64) + +define void @f5() nounwind { +entry: +; 32: lui $4, 1 +; 32: lui $4, 1 + + tail call void @f6(i32 65536) nounwind + tail call void @f6(i32 65536) nounwind + ret void +} + +declare void @f6(i32) + +define void @f7() nounwind { +entry: +; 64: lui $4, 1 +; 64: lui $4, 1 + + tail call void @f8(i64 65536) nounwind + tail call void @f8(i64 65536) nounwind + ret void +} + +declare void @f8(i64) + diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll new file mode 100644 index 0000000000..cda0c96ef4 --- /dev/null +++ b/test/CodeGen/Mips/selpat.ll @@ -0,0 +1,350 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 + +@t = global i32 10, align 4 +@f = global i32 199, align 4 +@a = global i32 1, align 4 +@b = global i32 10, align 4 +@c = global i32 1, align 4 +@z1 = common global i32 0, align 4 +@z2 = common global i32 0, align 4 +@z3 = common global i32 0, align 4 +@z4 = common global i32 0, align 4 + +define void @calc_seleq() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp eq i32 %0, %1 + %2 = load i32* @f, align 4 + %3 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + store i32 %cond, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp eq i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %3, i32 %2 + store i32 %cond10, i32* @z3, align 4 + store i32 %cond10, i32* @z4, align 4 + ret void +} + + +define void @calc_seleqk() nounwind { +entry: + %0 = load i32* @a, align 4 + %cmp = icmp eq i32 %0, 1 + %1 = load i32* @t, align 4 + %2 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: cmpi ${{[0-9]+}}, 1 +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp eq i32 %0, 10 + %cond5 = select i1 %cmp1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %3 = load i32* @b, align 4 + %cmp6 = icmp eq i32 %3, 3 + %cond10 = select i1 %cmp6, i32 %2, i32 %1 + store i32 %cond10, i32* @z3, align 4 +; 16: cmpi ${{[0-9]+}}, 10 +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp11 = icmp eq i32 %3, 10 + %cond15 = select i1 %cmp11, i32 %1, i32 %2 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define void @calc_seleqz() nounwind { +entry: + %0 = load i32* @a, align 4 + %cmp = icmp eq i32 %0, 0 + %1 = load i32* @t, align 4 + %2 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: beqz ${{[0-9]+}}, .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %3 = load i32* @b, align 4 + %cmp1 = icmp eq i32 %3, 0 + %cond5 = select i1 %cmp1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp eq i32 %4, 0 + %cond10 = select i1 %cmp6, i32 %1, i32 %2 + store i32 %cond10, i32* @z3, align 4 + store i32 %cond, i32* @z4, align 4 + ret void +} + +define void @calc_selge() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp sge i32 %0, %1 + %2 = load i32* @f, align 4 + %3 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp sge i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp sge i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %3, i32 %2 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp sge i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %3, i32 %2 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define i32 @calc_selgt() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp sgt i32 %0, %1 +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %2 = load i32* @f, align 4 + %3 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 + %cmp1 = icmp sgt i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp sgt i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %2, i32 %3 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp sgt i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %2, i32 %3 + store i32 %cond15, i32* @z4, align 4 + ret i32 undef +} + +define void @calc_selle() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp sle i32 %0, %1 + %2 = load i32* @t, align 4 + %3 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: slt ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp sle i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp sle i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %2, i32 %3 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp sle i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %2, i32 %3 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define void @calc_selltk() nounwind { +entry: + %0 = load i32* @a, align 4 + %cmp = icmp slt i32 %0, 10 + %1 = load i32* @t, align 4 + %2 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: slti ${{[0-9]+}}, {{[0-9]+}} +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %3 = load i32* @b, align 4 + %cmp1 = icmp slt i32 %3, 2 + %cond5 = select i1 %cmp1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp sgt i32 %4, 2 + %cond10 = select i1 %cmp6, i32 %2, i32 %1 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp sgt i32 %0, 2 + %cond15 = select i1 %cmp11, i32 %2, i32 %1 + store i32 %cond15, i32* @z4, align 4 + ret void +} + + +define void @calc_selne() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp ne i32 %0, %1 + %2 = load i32* @t, align 4 + %3 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: cmp ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + store i32 %cond, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp ne i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %3, i32 %2 + store i32 %cond10, i32* @z3, align 4 + store i32 %cond10, i32* @z4, align 4 + ret void +} + +define void @calc_selnek() nounwind { +entry: + %0 = load i32* @a, align 4 + %cmp = icmp ne i32 %0, 1 + %1 = load i32* @f, align 4 + %2 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: cmpi ${{[0-9]+}}, 1 +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp ne i32 %0, 10 + %cond5 = select i1 %cmp1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %3 = load i32* @b, align 4 + %cmp6 = icmp ne i32 %3, 3 + %cond10 = select i1 %cmp6, i32 %2, i32 %1 + store i32 %cond10, i32* @z3, align 4 +; 16: cmpi ${{[0-9]+}}, 10 +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp11 = icmp ne i32 %3, 10 + %cond15 = select i1 %cmp11, i32 %1, i32 %2 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define void @calc_selnez() nounwind { +entry: + %0 = load i32* @a, align 4 + %cmp = icmp ne i32 %0, 0 + %1 = load i32* @f, align 4 + %2 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: bnez ${{[0-9]+}}, .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %3 = load i32* @b, align 4 + %cmp1 = icmp ne i32 %3, 0 + %cond5 = select i1 %cmp1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp ne i32 %4, 0 + %cond10 = select i1 %cmp6, i32 %1, i32 %2 + store i32 %cond10, i32* @z3, align 4 + store i32 %cond, i32* @z4, align 4 + ret void +} + +define void @calc_selnez2() nounwind { +entry: + %0 = load i32* @a, align 4 + %tobool = icmp ne i32 %0, 0 + %1 = load i32* @f, align 4 + %2 = load i32* @t, align 4 + %cond = select i1 %tobool, i32 %1, i32 %2 + store i32 %cond, i32* @z1, align 4 +; 16: bnez ${{[0-9]+}}, .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %3 = load i32* @b, align 4 + %tobool1 = icmp ne i32 %3, 0 + %cond5 = select i1 %tobool1, i32 %2, i32 %1 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %tobool6 = icmp ne i32 %4, 0 + %cond10 = select i1 %tobool6, i32 %1, i32 %2 + store i32 %cond10, i32* @z3, align 4 + store i32 %cond, i32* @z4, align 4 + ret void +} + +define void @calc_seluge() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp uge i32 %0, %1 + %2 = load i32* @f, align 4 + %3 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp uge i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp uge i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %3, i32 %2 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp uge i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %3, i32 %2 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define void @calc_selugt() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp ugt i32 %0, %1 + %2 = load i32* @f, align 4 + %3 = load i32* @t, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: btnez .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp ugt i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp ugt i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %2, i32 %3 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp ugt i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %2, i32 %3 + store i32 %cond15, i32* @z4, align 4 + ret void +} + +define void @calc_selule() nounwind { +entry: + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %cmp = icmp ule i32 %0, %1 + %2 = load i32* @t, align 4 + %3 = load i32* @f, align 4 + %cond = select i1 %cmp, i32 %2, i32 %3 + store i32 %cond, i32* @z1, align 4 +; 16: sltu ${{[0-9]+}}, ${{[0-9]+}} +; 16: bteqz .+4 +; 16: move ${{[0-9]+}}, ${{[0-9]+}} + %cmp1 = icmp ule i32 %1, %0 + %cond5 = select i1 %cmp1, i32 %3, i32 %2 + store i32 %cond5, i32* @z2, align 4 + %4 = load i32* @c, align 4 + %cmp6 = icmp ule i32 %4, %0 + %cond10 = select i1 %cmp6, i32 %2, i32 %3 + store i32 %cond10, i32* @z3, align 4 + %cmp11 = icmp ule i32 %0, %4 + %cond15 = select i1 %cmp11, i32 %2, i32 %3 + store i32 %cond15, i32* @z4, align 4 + ret void +} diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll index 40aaa7c030..b6bae09bcb 100644 --- a/test/CodeGen/Mips/setgek.ll +++ b/test/CodeGen/Mips/setgek.ll @@ -12,7 +12,7 @@ entry: %conv = zext i1 %cmp to i32 store i32 %conv, i32* @r1, align 4 ; 16: slti ${{[0-9]+}}, -32768 -; 16: move $[[REGISTER:[0-9]+]], $t8 -; 16: xor ${{[0-9]+}}, $[[REGISTER]] +; 16: move ${{[0-9]+}}, $t8 +; 16: xor ${{[0-9]+}}, ${{[0-9]+}} ret void } diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll new file mode 100644 index 0000000000..c00c9fd9d2 --- /dev/null +++ b/test/CodeGen/Mips/stchar.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b + +@.str = private unnamed_addr constant [9 x i8] c"%hd %c \0A\00", align 1 +@sp = common global i16* null, align 4 +@cp = common global i8* null, align 4 + +define void @p1(i16 signext %s, i8 signext %c) nounwind { +entry: + %conv = sext i16 %s to i32 + %conv1 = sext i8 %c to i32 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind + ret void +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +define void @p2() nounwind { +entry: + %0 = load i16** @sp, align 4 + %1 = load i16* %0, align 2 + %2 = load i8** @cp, align 4 + %3 = load i8* %2, align 1 + %conv.i = sext i16 %1 to i32 + %conv1.i = sext i8 %3 to i32 + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind + %4 = load i16** @sp, align 4 + store i16 32, i16* %4, align 2 + %5 = load i8** @cp, align 4 + store i8 97, i8* %5, align 1 + ret void +} + +define void @test() nounwind { +entry: + %s = alloca i16, align 4 + %c = alloca i8, align 4 + store i16 16, i16* %s, align 4 + store i8 99, i8* %c, align 4 + store i16* %s, i16** @sp, align 4 + store i8* %c, i8** @cp, align 4 + %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind + %0 = load i16** @sp, align 4 + store i16 32, i16* %0, align 2 + %1 = load i8** @cp, align 4 + store i8 97, i8* %1, align 1 + %2 = load i16* %s, align 4 + %3 = load i8* %c, align 4 + %conv.i = sext i16 %2 to i32 + %conv1.i = sext i8 %3 to i32 + %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind + ret void +; 16_b: test: +; 16_h: test: +; 16_b: sb ${{[0-9]+}}, [[offset1:[0-9]+]](${{[0-9]+}}) +; 16_b: lb ${{[0-9]+}}, [[offset1]](${{[0-9]+}}) +; 16_h: sh ${{[0-9]+}}, [[offset2:[0-9]+]](${{[0-9]+}}) +; 16_h: lh ${{[0-9]+}}, [[offset2]](${{[0-9]+}}) +} + +define i32 @main() nounwind { +entry: + %s.i = alloca i16, align 4 + %c.i = alloca i8, align 4 + %0 = bitcast i16* %s.i to i8* + call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind + call void @llvm.lifetime.start(i64 -1, i8* %c.i) nounwind + store i16 16, i16* %s.i, align 4 + store i8 99, i8* %c.i, align 4 + store i16* %s.i, i16** @sp, align 4 + store i8* %c.i, i8** @cp, align 4 + %call.i.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind + %1 = load i16** @sp, align 4 + store i16 32, i16* %1, align 2 + %2 = load i8** @cp, align 4 + store i8 97, i8* %2, align 1 + %3 = load i16* %s.i, align 4 + %4 = load i8* %c.i, align 4 + %conv.i.i = sext i16 %3 to i32 + %conv1.i.i = sext i8 %4 to i32 + %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind + call void @llvm.lifetime.end(i64 -1, i8* %c.i) nounwind + ret i32 0 +} + +declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind + +declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll index 4989636a20..bcd33fca70 100644 --- a/test/CodeGen/Mips/tailcall.ll +++ b/test/CodeGen/Mips/tailcall.ll @@ -4,6 +4,8 @@ ; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \ ; RUN: < %s | FileCheck %s -check-prefix=N64 +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic \ +; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=PIC16 @g0 = common global i32 0, align 4 @g1 = common global i32 0, align 4 @@ -21,6 +23,7 @@ entry: ; PIC32-NOT: jalr ; STATIC32-NOT: jal ; N64-NOT: jalr +; PIC16: jalrc %call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind ret i32 %call @@ -33,6 +36,7 @@ entry: ; PIC32: jalr ; STATIC32: jal ; N64-NOT: jalr +; PIC16: jalrc %call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind ret i32 %call @@ -45,6 +49,7 @@ entry: ; PIC32: jalr ; STATIC32: jal ; N64-NOT: jalr +; PIC16: jalrc %call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind ret i32 %call @@ -57,6 +62,7 @@ entry: ; PIC32: jalr ; STATIC32: jal ; N64: jalr +; PIC16: jalrc %call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind ret i32 %call @@ -66,9 +72,18 @@ declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32) define i32 @caller5() nounwind readonly { entry: +; PIC32: .ent caller5 ; PIC32-NOT: jalr +; PIC32: .end caller5 +; STATIC32: .ent caller5 ; STATIC32-NOT: jal +; STATIC32: .end caller5 +; N64: .ent caller5 ; N64-NOT: jalr +; N64: .end caller5 +; PIC16: .ent caller5 +; PIC16: jalrc +; PIC16: .end caller5 %0 = load i32* @g0, align 4 %1 = load i32* @g1, align 4 @@ -98,3 +113,133 @@ entry: ret i32 %add8 } +declare i32 @callee8(i32, ...) + +define i32 @caller8_0() nounwind { +entry: + %call = tail call fastcc i32 @caller8_1() + ret i32 %call +} + +define internal fastcc i32 @caller8_1() nounwind noinline { +entry: +; PIC32: .ent caller8_1 +; PIC32: jalr +; PIC32: .end caller8_1 +; STATIC32: .ent caller8_1 +; STATIC32: jal +; STATIC32: .end caller8_1 +; N64: .ent caller8_1 +; N64-NOT: jalr +; N64: .end caller8_1 +; PIC16: .ent caller8_1 +; PIC16: jalrc +; PIC16: .end caller8_1 + + %call = tail call i32 (i32, ...)* @callee8(i32 2, i32 1) nounwind + ret i32 %call +} + +%struct.S = type { [2 x i32] } + +@gs1 = external global %struct.S + +declare i32 @callee9(%struct.S* byval) + +define i32 @caller9_0() nounwind { +entry: + %call = tail call fastcc i32 @caller9_1() + ret i32 %call +} + +define internal fastcc i32 @caller9_1() nounwind noinline { +entry: +; PIC32: .ent caller9_1 +; PIC32: jalr +; PIC32: .end caller9_1 +; STATIC32: .ent caller9_1 +; STATIC32: jal +; STATIC32: .end caller9_1 +; N64: .ent caller9_1 +; N64: jalr +; N64: .end caller9_1 +; PIC16: .ent caller9_1 +; PIC16: jalrc +; PIC16: .end caller9_1 + + %call = tail call i32 @callee9(%struct.S* byval @gs1) nounwind + ret i32 %call +} + +declare i32 @callee10(i32, i32, i32, i32, i32, i32, i32, i32, i32) + +define i32 @caller10(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) nounwind { +entry: +; PIC32: .ent caller10 +; PIC32-NOT: jalr +; STATIC32: .ent caller10 +; STATIC32-NOT: jal +; N64: .ent caller10 +; N64-NOT: jalr +; PIC16: .ent caller10 +; PIC16: jalrc + + %call = tail call i32 @callee10(i32 %a8, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind + ret i32 %call +} + +declare i32 @callee11(%struct.S* byval) + +define i32 @caller11() nounwind noinline { +entry: +; PIC32: .ent caller11 +; PIC32: jalr +; STATIC32: .ent caller11 +; STATIC32: jal +; N64: .ent caller11 +; N64: jalr +; PIC16: .ent caller11 +; PIC16: jalrc + + %call = tail call i32 @callee11(%struct.S* byval @gs1) nounwind + ret i32 %call +} + +declare i32 @callee12() + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +define i32 @caller12(%struct.S* nocapture byval %a0) nounwind { +entry: +; PIC32: .ent caller12 +; PIC32: jalr +; STATIC32: .ent caller12 +; STATIC32: jal +; N64: .ent caller12 +; N64: jalr +; PIC16: .ent caller12 +; PIC16: jalrc + + %0 = bitcast %struct.S* %a0 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast (%struct.S* @gs1 to i8*), i8* %0, i32 8, i32 4, i1 false) + %call = tail call i32 @callee12() nounwind + ret i32 %call +} + +declare i32 @callee13(i32, ...) + +define i32 @caller13() nounwind { +entry: +; PIC32: .ent caller13 +; PIC32-NOT: jalr +; STATIC32: .ent caller13 +; STATIC32-NOT: jal +; N64: .ent caller13 +; N64-NOT: jalr +; PIC16: .ent caller13 +; PIC16: jalrc + + %call = tail call i32 (i32, ...)* @callee13(i32 1, i32 2) nounwind + ret i32 %call +} + diff --git a/test/CodeGen/Mips/tls16.ll b/test/CodeGen/Mips/tls16.ll new file mode 100644 index 0000000000..861864bcfe --- /dev/null +++ b/test/CodeGen/Mips/tls16.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16 + +@a = thread_local global i32 4, align 4 + +define i32 @foo() nounwind readonly { +entry: + %0 = load i32* @a, align 4 +; PIC16: lw ${{[0-9]+}}, %call16(__tls_get_addr)(${{[0-9]+}}) +; PIC16: addiu ${{[0-9]+}}, %tlsgd(a) + ret i32 %0 +} + + diff --git a/test/CodeGen/Mips/tls16_2.ll b/test/CodeGen/Mips/tls16_2.ll new file mode 100644 index 0000000000..b33e3c3766 --- /dev/null +++ b/test/CodeGen/Mips/tls16_2.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16 + +@f.i = internal thread_local unnamed_addr global i32 1, align 4 + +define i8* @f(i8* nocapture %a) nounwind { +entry: + %0 = load i32* @f.i, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @f.i, align 4 + %1 = inttoptr i32 %inc to i8* +; PIC16: addiu ${{[0-9]+}}, %tlsldm(f.i) + ret i8* %1 +} + + diff --git a/test/CodeGen/PowerPC/emptystruct.ll b/test/CodeGen/PowerPC/emptystruct.ll new file mode 100644 index 0000000000..36b4abd2bf --- /dev/null +++ b/test/CodeGen/PowerPC/emptystruct.ll @@ -0,0 +1,51 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; This tests correct handling of empty aggregate parameters and return values. +; An empty parameter passed by value does not consume a protocol register or +; a parameter save area doubleword. An empty parameter passed by reference +; is treated as any other pointer parameter. An empty aggregate return value +; is treated as any other aggregate return value, passed via address as a +; hidden parameter in GPR3. In this example, GPR3 contains the return value +; address, GPR4 contains the address of e2, and e1 and e3 are not passed or +; received. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.empty = type {} + +define void @callee(%struct.empty* noalias sret %agg.result, %struct.empty* byval %a1, %struct.empty* %a2, %struct.empty* byval %a3) nounwind { +entry: + %a2.addr = alloca %struct.empty*, align 8 + store %struct.empty* %a2, %struct.empty** %a2.addr, align 8 + %0 = load %struct.empty** %a2.addr, align 8 + %1 = bitcast %struct.empty* %agg.result to i8* + %2 = bitcast %struct.empty* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false) + ret void +} + +; CHECK: callee: +; CHECK: std 4, +; CHECK: std 3, +; CHECK-NOT: std 5, +; CHECK-NOT: std 6, +; CHECK: blr + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind + +define void @caller(%struct.empty* noalias sret %agg.result) nounwind { +entry: + %e1 = alloca %struct.empty, align 1 + %e2 = alloca %struct.empty, align 1 + %e3 = alloca %struct.empty, align 1 + call void @callee(%struct.empty* sret %agg.result, %struct.empty* byval %e1, %struct.empty* %e2, %struct.empty* byval %e3) + ret void +} + +; CHECK: caller: +; CHECK: addi 4, +; CHECK: std 3, +; CHECK-NOT: std 5, +; CHECK-NOT: std 6, +; CHECK: bl callee diff --git a/test/CodeGen/PowerPC/int-fp-conv-1.ll b/test/CodeGen/PowerPC/int-fp-conv-1.ll index 6c82723519..d2887b9b94 100644 --- a/test/CodeGen/PowerPC/int-fp-conv-1.ll +++ b/test/CodeGen/PowerPC/int-fp-conv-1.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=ppc64 | grep __floatditf +; RUN: llc < %s -march=ppc64 | FileCheck %s +; CHECK-NOT: __floatditf define i64 @__fixunstfdi(ppc_fp128 %a) nounwind { entry: diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll new file mode 100644 index 0000000000..10b70d02e5 --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -0,0 +1,26 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +; Verify internal alignment of long double in a struct. The double +; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain +; the long double. Check that these are stored to proper locations +; in the parameter save area and loaded from there for return in FPR1/2. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S = type { double, ppc_fp128 } + +define ppc_fp128 @test(%struct.S* byval %x) nounwind { +entry: + %b = getelementptr inbounds %struct.S* %x, i32 0, i32 1 + %0 = load ppc_fp128* %b, align 16 + ret ppc_fp128 %0 +} + +; CHECK: std 6, 72(1) +; CHECK: std 5, 64(1) +; CHECK: std 4, 56(1) +; CHECK: std 3, 48(1) +; CHECK: lfd 1, 64(1) +; CHECK: lfd 2, 72(1) + diff --git a/test/CodeGen/PowerPC/pr12757.ll b/test/CodeGen/PowerPC/pr12757.ll new file mode 100644 index 0000000000..c344656d29 --- /dev/null +++ b/test/CodeGen/PowerPC/pr12757.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32 @__flt_rounds() nounwind { +entry: + %0 = tail call i64 asm sideeffect "mffs $0", "=f"() nounwind + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK: @__flt_rounds +; CHECK: mffs + diff --git a/test/CodeGen/PowerPC/varargs-struct-float.ll b/test/CodeGen/PowerPC/varargs-struct-float.ll new file mode 100644 index 0000000000..fb1835f580 --- /dev/null +++ b/test/CodeGen/PowerPC/varargs-struct-float.ll @@ -0,0 +1,23 @@ +; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.Sf1 = type { float } + +define void @foo(float inreg %s.coerce) nounwind { +entry: + %s = alloca %struct.Sf1, align 4 + %coerce.dive = getelementptr %struct.Sf1* %s, i32 0, i32 0 + store float %s.coerce, float* %coerce.dive, align 1 + %coerce.dive1 = getelementptr %struct.Sf1* %s, i32 0, i32 0 + %0 = load float* %coerce.dive1, align 1 + call void (i32, ...)* @testvaSf1(i32 1, float inreg %0) + ret void +} + +; CHECK: stfs {{[0-9]+}}, 60(1) +; CHECK: ld 4, 56(1) +; CHECK: bl + +declare void @testvaSf1(i32, ...) diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll index b2b59db8f1..3180f464d1 100644 --- a/test/CodeGen/PowerPC/vec_cmp.ll +++ b/test/CodeGen/PowerPC/vec_cmp.ll @@ -1,6 +1,9 @@ -; RUN: llc -mattr=+altivec < %s | FileCheck %s +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s -; Check vector comparisons using altivec. +; Check vector comparisons using altivec. For non native types, just basic +; comparison instruction check is done. For altivec supported type (16i8, +; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=) +; are checked. target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" @@ -33,13 +36,105 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone { ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <16 x i8> @v16si8_cmp(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +; Adicional tests for v16i8 since it is a altivec native type + +define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone { %cmp = icmp eq <16 x i8> %x, %y %sext = sext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %sext } -; CHECK: v16si8_cmp: -; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v16si8_cmp_eq: +; CHECK: vcmpequb 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ne <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ne: +; CHECK: vcmpequb [[RET:[0-9]+]], 2, 3 +; CHECK-NOR: vnor 2, [[RET]], [[RET]] + +define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sle <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ule <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp slt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_lt: +; CHECK: vcmpgtsb 2, 3, 2 + +define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ult <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_lt: +; CHECK: vcmpgtub 2, 3, 2 + +define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sgt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_gt: +; CHECK: vcmpgtsb 2, 2, 3 + +define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ugt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_gt: +; CHECK: vcmpgtub 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp uge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone { @@ -70,13 +165,106 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <8 x i16> @v8si16_cmp(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +; Adicional tests for v8i16 since it is an altivec native type + +define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: %cmp = icmp eq <8 x i16> %x, %y %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext } -; CHECK: v8si16_cmp: -; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v8si16_cmp_eq: +; CHECK: vcmpequh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ne <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ne: +; CHECK: vcmpequh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sle <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ule <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp slt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_lt: +; CHECK: vcmpgtsh 2, 3, 2 + +define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ult <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_lt: +; CHECK: vcmpgtuh 2, 3, 2 + +define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sgt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_gt: +; CHECK: vcmpgtsh 2, 2, 3 + +define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ugt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_gt: +; CHECK: vcmpgtuh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp uge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone { @@ -110,13 +298,106 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x i32> @v4si32_cmp(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +; Adicional tests for v4si32 since it is an altivec native type + +define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: %cmp = icmp eq <4 x i32> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext } -; CHECK: v4si32_cmp: -; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4si32_cmp_eq: +; CHECK: vcmpequw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ne <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ne: +; CHECK: vcmpequw [[RCMP:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RCMP]], [[RCMP]] + +define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sle <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ule <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp slt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_lt: +; CHECK: vcmpgtsw 2, 3, 2 + +define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ult <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_lt: +; CHECK: vcmpgtuw 2, 3, 2 + +define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sgt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_gt: +; CHECK: vcmpgtsw 2, 2, 3 + +define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ugt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_gt: +; CHECK: vcmpgtuw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp uge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone { @@ -168,15 +449,70 @@ entry: ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x float> @v4f32_cmp(<4 x float> %x, <4 x float> %y) nounwind readnone { +; Adicional tests for v4f32 since it is a altivec native type + +define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone { entry: %cmp = fcmp oeq <4 x float> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> %0 = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %0 } -; CHECK: v4f32_cmp: -; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4f32_cmp_eq: +; CHECK: vcmpeqfp 2, 2, 3 + +define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp une <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ne: +; CHECK: vcmpeqfp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ole <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_le: +; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp olt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_lt: +; CHECK: vcmpgtfp 2, 3, 2 + +define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oge <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ge: +; CHECK: vcmpgefp 2, 2, 3 + +define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ogt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_gt: +; CHECK: vcmpgtfp 2, 2, 3 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone { diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll new file mode 100644 index 0000000000..201c15b9c7 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_extload.ll @@ -0,0 +1,155 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s + +; Check vector extend load expansion with altivec enabled. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Altivec does not provides an sext intruction, so it expands +; a set of vector stores (stvx), bytes load/sign expand/store +; (lbz/stb), and a final vector load (lvx) to load the result +; extended vector. +define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = sext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; The zero extend uses a more clever logic: a vector splat +; and a logic and to set higher bits to 0. +define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = zext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_zext_in_reg: +; CHECK: vspltisb [[VMASK:[0-9]+]], 15 +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth). +define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = sext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg, but instead of creating the mask +; with a splat, loads it from memory. +define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = zext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_zext_in_reg: +; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2) +; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]] +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load halfword (lha) and +; store words (stw). +define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = sext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg. +define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = zext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_zext_in_reg: +; CHECK: vspltisw [[VMASK:[0-9]+]], -16 +; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]] +; CHECK-NEXT: vand 2, 2, [[VMASK]] diff --git a/test/CodeGen/PowerPC/vec_sqrt.ll b/test/CodeGen/PowerPC/vec_sqrt.ll new file mode 100644 index 0000000000..055da1a229 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_sqrt.ll @@ -0,0 +1,71 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec,+fsqrt < %s | FileCheck %s + +; Check for vector sqrt expansion using floating-point types, since altivec +; does not provide an fsqrt instruction for vector. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %val) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %val) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float> %val) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double> %val) + +define <2 x float> @v2f32_sqrt(<2 x float> %x) nounwind readnone { +entry: + %sqrt = call <2 x float> @llvm.sqrt.v2f32 (<2 x float> %x) + ret <2 x float> %sqrt +} +; sqrt (<2 x float>) is promoted to sqrt (<4 x float>) +; CHECK: v2f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <4 x float> @v4f32_sqrt(<4 x float> %x) nounwind readnone { +entry: + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %x) + ret <4 x float> %sqrt +} +; CHECK: v4f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <8 x float> @v8f32_sqrt(<8 x float> %x) nounwind readnone { +entry: + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %x) + ret <8 x float> %sqrt +} +; CHECK: v8f32_sqrt: +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrts {{[0-9]+}}, {{[0-9]+}} + +define <2 x double> @v2f64_sqrt(<2 x double> %x) nounwind readnone { +entry: + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %x) + ret <2 x double> %sqrt +} +; CHECK: v2f64_sqrt: +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} + +define <4 x double> @v4f64_sqrt(<4 x double> %x) nounwind readnone { +entry: + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %x) + ret <4 x double> %sqrt +} +; CHECK: v4f64_sqrt: +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} +; CHECK: fsqrt {{[0-9]+}}, {{[0-9]+}} diff --git a/test/CodeGen/SPARC/load_to_switch.ll b/test/CodeGen/SPARC/load_to_switch.ll deleted file mode 100644 index 8d62de527e..0000000000 --- a/test/CodeGen/SPARC/load_to_switch.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: llc -march=sparc < %s | FileCheck %s - -; Check that all the switches turned into lookup tables by SimplifyCFG are -; turned back into switches for targets that don't like lookup tables. - -@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1 -@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1 -@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1 -@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1 -@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1 -@switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] -@switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05" -@switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] -@switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] - -define i32 @f(i32 %c) { -entry: - %switch.tableidx = sub i32 %c, 42 - %0 = icmp ult i32 %switch.tableidx, 7 - br i1 %0, label %switch.lookup, label %return - -switch.lookup: - %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx - %switch.load = load i32* %switch.gep - ret i32 %switch.load - -return: - ret i32 15 - -; CHECK: f: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table) -} - -declare void @dummy(i8 signext, float) - -define void @h(i32 %x) { -entry: - %switch.tableidx = sub i32 %x, 0 - %0 = icmp ult i32 %switch.tableidx, 4 - br i1 %0, label %switch.lookup, label %sw.epilog - -switch.lookup: - %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx - %switch.load = load i8* %switch.gep - %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx - %switch.load2 = load float* %switch.gep1 - br label %sw.epilog - -sw.epilog: - %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] - %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] - call void @dummy(i8 signext %a.0, float %b.0) - ret void - -; CHECK: h: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) -; CHECK-NOT: sethi %hi(.Lswitch.table{{[0-9]}}) -} - -define i8* @foostring(i32 %x) { -entry: - %switch.tableidx = sub i32 %x, 0 - %0 = icmp ult i32 %switch.tableidx, 4 - br i1 %0, label %switch.lookup, label %return - -switch.lookup: - %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx - %switch.load = load i8** %switch.gep - ret i8* %switch.load - -return: - ret i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0) - -; CHECK: foostring: -; CHECK: %switch.lookup -; CHECK-NOT: sethi %hi(.Lswitch.table3) -} - -; CHECK-NOT: .Lswitch.table -; CHECK-NOT: .Lswitch.table1 -; CHECK-NOT: .Lswitch.table2 -; CHECK-NOT: .Lswitch.table3 diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll index 35914b1679..2074f98cb6 100644 --- a/test/CodeGen/Thumb2/thumb2-uxtb.ll +++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll @@ -128,9 +128,9 @@ define i32 @test10(i32 %p0) { ; ARMv7M: test10 ; ARMv7M: mov.w r1, #16253176 -; ARMv7M: mov.w r2, #458759 ; ARMv7M: and.w r0, r1, r0, lsr #7 -; ARMv7M: and.w r1, r2, r0, lsr #5 +; ARMv7M: mov.w r1, #458759 +; ARMv7M: and.w r1, r1, r0, lsr #5 ; ARMv7M: orrs r0, r1 %tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1] %tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2] diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll index a4abccba7e..4e30f2b05a 100644 --- a/test/CodeGen/X86/add-of-carry.ll +++ b/test/CodeGen/X86/add-of-carry.ll @@ -30,4 +30,17 @@ entry: ret i32 %z.0 } +; <rdar://problem/12579915> +define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp { +entry: + %cmp = icmp ugt i32 %x, %y + %dec = sext i1 %cmp to i32 + %dec.res = add nsw i32 %dec, %res + ret i32 %dec.res +; CHECK: test3: +; CHECK: cmpl +; CHECK: sbbl +; CHECK: ret +} + declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone diff --git a/test/CodeGen/X86/atom-shuf.ll b/test/CodeGen/X86/atom-shuf.ll new file mode 100644 index 0000000000..4c3f2f67c5 --- /dev/null +++ b/test/CodeGen/X86/atom-shuf.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=atom | FileCheck %s + +define <16 x i8> @foo(<16 x i8> %in) { + %r = shufflevector <16 x i8> %in, <16 x i8> undef, <16 x i32> < i32 7, i32 3, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + ret <16 x i8> %r +; CHECK: foo +; CHECK: pshufb +; CHECK-NEXT: ret +} diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll index 3eb7b37ee6..276d0db9a4 100644 --- a/test/CodeGen/X86/crash.ll +++ b/test/CodeGen/X86/crash.ll @@ -580,3 +580,12 @@ bb28: ; preds = %bb21 bb29: ; preds = %bb28, %bb26, %bb25, %bb21 unreachable } + +define void @pr14194() nounwind uwtable { + %tmp = load i64* undef, align 16 + %tmp1 = trunc i64 %tmp to i32 + %tmp2 = lshr i64 %tmp, 32 + %tmp3 = trunc i64 %tmp2 to i32 + %tmp4 = call { i32, i32 } asm sideeffect "", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(i32 %tmp3, i32 undef, i32 %tmp3, i32 %tmp1) nounwind + ret void +} diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll index 091f0de930..d70aa7d79f 100644 --- a/test/CodeGen/X86/fp-fast.ll +++ b/test/CodeGen/X86/fp-fast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 -mattr=-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s ; CHECK: test1 define float @test1(float %a) { @@ -35,3 +35,23 @@ define float @test3(float %a) { ret float %r } +; CHECK: test4 +define float @test4(float %a) { +; CHECK-NOT: fma +; CHECK-NOT mul +; CHECK-NOT: add +; CHECK: ret + %t1 = fmul float %a, 0.0 + %t2 = fadd float %a, %t1 + ret float %t2 +} + +; CHECK: test5 +define float @test5(float %a) { +; CHECK-NOT: add +; CHECK: vxorps +; CHECK: ret + %t1 = fsub float -0.0, %a + %t2 = fadd float %a, %t1 + ret float %t2 +} diff --git a/test/CodeGen/X86/inlineasm-sched-bug.ll b/test/CodeGen/X86/inlineasm-sched-bug.ll new file mode 100644 index 0000000000..08de0c02d2 --- /dev/null +++ b/test/CodeGen/X86/inlineasm-sched-bug.ll @@ -0,0 +1,13 @@ +; PR13504 +; RUN: llc -march=x86 -mcpu=atom <%s | FileCheck %s +; CHECK: bsfl +; CHECK-NOT: movl + +define i32 @foo(i32 %treemap) nounwind uwtable { +entry: + %sub = sub i32 0, %treemap + %and = and i32 %treemap, %sub + %0 = tail call i32 asm "bsfl $1,$0\0A\09", "=r,rm,~{dirflag},~{fpsr},~{flags}"(i32 %and) nounwind + ret i32 %0 +} + diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll index 78d9e06f59..0e34222b94 100644 --- a/test/CodeGen/X86/jump_sign.ll +++ b/test/CodeGen/X86/jump_sign.ll @@ -219,7 +219,6 @@ entry: ; by sbb, we should not optimize cmp away. define i32 @q(i32 %j.4, i32 %w, i32 %el) { ; CHECK: q: -; CHECK: sub ; CHECK: cmp ; CHECK-NEXT: sbb %tmp532 = add i32 %j.4, %w diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll index 8b7200d2f7..a8d33f43da 100644 --- a/test/CodeGen/X86/mmx-builtins.ll +++ b/test/CodeGen/X86/mmx-builtins.ll @@ -1043,6 +1043,20 @@ entry: ret i64 %5 } +define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { +; CHECK: test21_2 +; CHECK: pshufw +; CHECK: movd +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <2 x i32> + %5 = extractelement <2 x i32> %4, i32 0 + ret i32 %5 +} + declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { diff --git a/test/CodeGen/X86/pr14204.ll b/test/CodeGen/X86/pr14204.ll new file mode 100644 index 0000000000..42e362bf3b --- /dev/null +++ b/test/CodeGen/X86/pr14204.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=core-avx2 | FileCheck %s + +; FIXME: vpmovsxwd should be generated instead of vpmovzxwd followed by +; SLL/SRA. + +define <8 x i32> @foo(<8 x i1> %bar) nounwind readnone { +entry: + %s = sext <8 x i1> %bar to <8 x i32> + ret <8 x i32> %s +; CHECK: foo +; CHECK: vpmovzxwd +; CHECK: vpslld +; CHECK: vpsrad +; CHECK: ret +} diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll new file mode 100644 index 0000000000..655f75800c --- /dev/null +++ b/test/CodeGen/X86/sse_partial_update.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s + +; rdar: 12558838 +; PR14221 +; There is a mismatch between the intrinsic and the actual instruction. +; The actual instruction has a partial update of dest, while the intrinsic +; passes through the upper FP values. Here, we make sure the source and +; destination of rsqrtss are the same. +define void @t1(<4 x float> %a) nounwind uwtable ssp { +entry: +; CHECK: t1: +; CHECK: rsqrtss %xmm0, %xmm0 + %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind + %a.addr.0.extract = extractelement <4 x float> %0, i32 0 + %conv = fpext float %a.addr.0.extract to double + %a.addr.4.extract = extractelement <4 x float> %0, i32 1 + %conv3 = fpext float %a.addr.4.extract to double + tail call void @callee(double %conv, double %conv3) nounwind + ret void +} +declare void @callee(double, double) +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + +define void @t2(<4 x float> %a) nounwind uwtable ssp { +entry: +; CHECK: t2: +; CHECK: rcpss %xmm0, %xmm0 + %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind + %a.addr.0.extract = extractelement <4 x float> %0, i32 0 + %conv = fpext float %a.addr.0.extract to double + %a.addr.4.extract = extractelement <4 x float> %0, i32 1 + %conv3 = fpext float %a.addr.4.extract to double + tail call void @callee(double %conv, double %conv3) nounwind + ret void +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll index 8dfc2eab41..4c56f848de 100644 --- a/test/CodeGen/X86/vec_shuffle-26.ll +++ b/test/CodeGen/X86/vec_shuffle-26.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse41 | FileCheck %s -; RUN: llc < %s -march=x86 -mcpu=atom -mattr=+sse41 | FileCheck -check-prefix=ATOM %s +; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s ; Transpose example using the more generic vector shuffle. Return float8 ; instead of float16 @@ -47,8 +47,8 @@ entry: ; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) ; ATOM: lo_hi_shift ; ATOM: movhps ([[BASEREG:%[a-z]+]]), -; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) -; ATOM: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) +; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) +; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]]) %v.i = bitcast float* %y to <4 x float>* %0 = load <4 x float>* %v.i, align 1 %1 = bitcast float* %x to <1 x i64>* diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll index 1651c4cdac..f5f8842605 100644 --- a/test/CodeGen/X86/vec_shuffle-30.ll +++ b/test/CodeGen/X86/vec_shuffle-30.ll @@ -1,21 +1,25 @@ -; RUN: llc < %s -march=x86 -mattr=sse41 -o %t -; RUN: grep pshufhw %t | grep -- -95 | count 1 -; RUN: grep shufps %t | count 1 -; RUN: not grep pslldq %t +; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s +; CHECK: test ; Test case when creating pshufhw, we incorrectly set the higher order bit ; for an undef, define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind { entry: +; CHECK-NOT: vmovaps +; CHECK: vmovlpd +; CHECK: vpshufhw $-95 %0 = load <8 x i16>* %dest %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14> store <8 x i16> %1, <8 x i16>* %dest ret void -} +} +; CHECK: test2 ; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind { entry: +; CHECK-NOT: pslldq +; CHECK: shufps %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2> store <4 x i32> %0, <4 x i32>* %dest ret void diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll index ebdfea9a37..56c63644e0 100644 --- a/test/CodeGen/X86/widen_cast-1.ll +++ b/test/CodeGen/X86/widen_cast-1.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s -; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s +; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s ; CHECK: paddd ; CHECK: movl diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll new file mode 100644 index 0000000000..d666a2aa4a --- /dev/null +++ b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll @@ -0,0 +1,32 @@ +; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s + +; This test checks that common symbols have been allocated addresses honouring +; the alignment requirement. + +@CS1 = common global i32 0, align 16 +@CS2 = common global i8 0, align 1 +@CS3 = common global i32 0, align 16 + +define i32 @main() nounwind { +entry: + %retval = alloca i32, align 4 + %ptr = alloca i32, align 4 + store i32 0, i32* %retval + store i32 ptrtoint (i32* @CS3 to i32), i32* %ptr, align 4 + %0 = load i32* %ptr, align 4 + %and = and i32 %0, 15 + %tobool = icmp ne i32 %and, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 1, i32* %retval + br label %return + +if.else: ; preds = %entry + store i32 0, i32* %retval + br label %return + +return: ; preds = %if.else, %if.then + %1 = load i32* %retval + ret i32 %1 +} diff --git a/test/ExecutionEngine/MCJIT/test-data-align.ll b/test/ExecutionEngine/MCJIT/test-data-align.ll new file mode 100644 index 0000000000..0493cba87f --- /dev/null +++ b/test/ExecutionEngine/MCJIT/test-data-align.ll @@ -0,0 +1,15 @@ +; RUN: %lli -mtriple=%mcjit_triple -use-mcjit -O0 %s + +; Check that a variable is always aligned as specified. + +@var = global i32 0, align 32 +define i32 @main() { + %addr = ptrtoint i32* @var to i64 + %mask = and i64 %addr, 31 + %tst = icmp eq i64 %mask, 0 + br i1 %tst, label %good, label %bad +good: + ret i32 0 +bad: + ret i32 1 +} diff --git a/test/Feature/forceoptsize_attr.ll b/test/Feature/forceoptsize_attr.ll deleted file mode 100644 index 89f2a5fcd8..0000000000 --- a/test/Feature/forceoptsize_attr.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis | FileCheck %s - -define void @test1() forcesizeopt { -; CHECK: define void @test1() forcesizeopt - ret void -} - diff --git a/test/Feature/minsize_attr.ll b/test/Feature/minsize_attr.ll new file mode 100644 index 0000000000..51b133c4bd --- /dev/null +++ b/test/Feature/minsize_attr.ll @@ -0,0 +1,7 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +define void @test1() minsize { +; CHECK: define void @test1() minsize + ret void +} + diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll new file mode 100644 index 0000000000..28d4ac0c0f --- /dev/null +++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll @@ -0,0 +1,19 @@ +; This test checks that we are not instrumenting globals +; that we created ourselves. +; RUN: opt < %s -asan -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_Z3barv() uwtable address_safety { +entry: + %a = alloca i32, align 4 + call void @_Z3fooPi(i32* %a) + ret void +} + +declare void @_Z3fooPi(i32*) +; We create one global string constant for the stack frame above. +; Make sure we don't create any other global constants. +; CHECK: = private constant +; CHECK-NOT: = private constant diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s index 4ee34ce6b4..22e21da88e 100644 --- a/test/MC/ARM/basic-thumb-instructions.s +++ b/test/MC/ARM/basic-thumb-instructions.s @@ -259,8 +259,8 @@ _func: @ CHECK: ldr r1, _foo @ encoding: [A,0x49] @ fixup A - offset: 0, value: _foo, kind: fixup_arm_thumb_cp -@ CHECK: ldr r3, #604 @ encoding: [0x97,0x4b] -@ CHECK: ldr r3, #368 @ encoding: [0x5c,0x4b] +@ CHECK: ldr r3, [pc, #604] @ encoding: [0x97,0x4b] +@ CHECK: ldr r3, [pc, #368] @ encoding: [0x5c,0x4b] @------------------------------------------------------------------------------ @ LDR (register) diff --git a/test/MC/ARM/thumb2-b.w-encodingT4.s b/test/MC/ARM/thumb2-b.w-encodingT4.s new file mode 100644 index 0000000000..be77b06267 --- /dev/null +++ b/test/MC/ARM/thumb2-b.w-encodingT4.s @@ -0,0 +1,12 @@ +@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding < %s | FileCheck %s + .syntax unified + .globl _func +.thumb_func _foo +.space 0x37c6 +_foo: +@------------------------------------------------------------------------------ +@ B (thumb2 b.w encoding T4) rdar://12585795 +@------------------------------------------------------------------------------ + b.w 0x3680c + +@ CHECK: b.w #223244 @ encoding: [0x6d,0xf0,0x0c,0xb0] diff --git a/test/MC/Disassembler/ARM/marked-up-thumb.txt b/test/MC/Disassembler/ARM/marked-up-thumb.txt index d7807cde8b..65be28618b 100644 --- a/test/MC/Disassembler/ARM/marked-up-thumb.txt +++ b/test/MC/Disassembler/ARM/marked-up-thumb.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -mdis < %s | FileCheck %s -# CHECK: ldr <reg:r4>, <imm:#32> +# CHECK: ldr <reg:r4>, <mem:[pc, <imm:#32>]> 0x08 0x4c # CHECK: push {<reg:r1>, <reg:r2>, <reg:r7>} 0x86 0xb4 diff --git a/test/MC/Disassembler/ARM/thumb-printf.txt b/test/MC/Disassembler/ARM/thumb-printf.txt index 8158a73edc..ca820444ad 100644 --- a/test/MC/Disassembler/ARM/thumb-printf.txt +++ b/test/MC/Disassembler/ARM/thumb-printf.txt @@ -7,17 +7,17 @@ # CHECK-NEXT: add r3, sp, #20 # CHECK-NEXT: ldr r5, [r3], #4 # CHECK-NEXT: str r3, [sp] -# CHECK-NEXT: ldr r3, #52 +# CHECK-NEXT: ldr r3, [pc, #52] # CHECK-NEXT: add r3, pc # CHECK-NEXT: ldr r0, [r3] # CHECK-NEXT: ldr r4, [r0] -# CHECK-NEXT: ldr r0, #48 +# CHECK-NEXT: ldr r0, [pc, #48] # CHECK-NEXT: add r0, pc # CHECK-NEXT: ldr r0, [r0] # CHECK-NEXT: ldr r0, [r0] # CHECK-NEXT: blx #191548 # CHECK-NEXT: cbnz r0, #6 -# CHECK-NEXT: ldr r1, #40 +# CHECK-NEXT: ldr r1, [pc, #40] # CHECK-NEXT: add r1, pc # CHECK-NEXT: ldr r1, [r1] # CHECK-NEXT: b #0 diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt index c08585a371..757ce6e397 100644 --- a/test/MC/Disassembler/ARM/thumb-tests.txt +++ b/test/MC/Disassembler/ARM/thumb-tests.txt @@ -30,7 +30,7 @@ # CHECK: ldm r0!, {r1} 0x02 0xc8 -# CHECK: ldr r5, #432 +# CHECK: ldr r5, [pc, #432] 0x6c 0x4d # CHECK: str r0, [r3] diff --git a/test/MC/Disassembler/ARM/thumb1.txt b/test/MC/Disassembler/ARM/thumb1.txt index 5b70262310..de9596aab7 100644 --- a/test/MC/Disassembler/ARM/thumb1.txt +++ b/test/MC/Disassembler/ARM/thumb1.txt @@ -160,6 +160,7 @@ # CHECK: ldr r1, [sp] # CHECK: ldr r2, [sp, #24] # CHECK: ldr r3, [sp, #1020] +# CHECK: ldr r1, [pc, #12] 0x29 0x68 @@ -168,6 +169,7 @@ 0x00 0x99 0x06 0x9a 0xff 0x9b +0x03 0x49 #------------------------------------------------------------------------------ # LDR (register) diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt index 42ebe58207..45dace3b09 100644 --- a/test/MC/Disassembler/ARM/thumb2.txt +++ b/test/MC/Disassembler/ARM/thumb2.txt @@ -169,6 +169,9 @@ 0x13 0xf5 0xce 0xa9 +# CHECK: b.w #208962 + +0x33 0xf0 0x21 0xb8 # rdar://12585795 #------------------------------------------------------------------------------ # BFC diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt index 095ed181ba..0a88c40839 100644 --- a/test/MC/Disassembler/Mips/mips64.txt +++ b/test/MC/Disassembler/Mips/mips64.txt @@ -3,7 +3,7 @@ # CHECK: daddiu $11, $26, 31949
0x67 0x4b 0x7c 0xcd
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
0x00 0x2b 0xd0 0x2d
# CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@ # CHECK: dsllv $gp, $27, $24
0x03 0x1b 0xe0 0x14
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
0x00 0x01 0x0f 0xbb
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
0x03 0xc1 0x08 0x17
# CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@ # CHECK: dsubu $gp, $27, $24
0x03 0x78 0xe0 0x2f
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
0x8c 0x3b 0xc4 0xcd
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
0x3c 0x01 0x00 0x01
# CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@ # CHECK: lui $ra, 1
0x3c 0x1f 0x00 0x01
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
0xac 0x3a 0xc4 0xc9
# CHECK: ld $26, 3958($zero)
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt index c4e5591da4..fe8faffa83 100644 --- a/test/MC/Disassembler/Mips/mips64_le.txt +++ b/test/MC/Disassembler/Mips/mips64_le.txt @@ -3,7 +3,7 @@ # CHECK: daddiu $11, $26, 31949
0xcd 0x7c 0x4b 0x67
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
0x2d 0xd0 0x2b 0x00
# CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@ # CHECK: dsllv $gp, $27, $24
0x14 0xe0 0x1b 0x03
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
0xbb 0x0f 0x01 0x00
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
0x17 0x08 0xc1 0x03
# CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@ # CHECK: dsubu $gp, $27, $24
0x2f 0xe0 0x78 0x03
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
0xcd 0xc4 0x3b 0x8c
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
0x01 0x00 0x01 0x3c
# CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@ # CHECK: lui $ra, 1
0x01 0x00 0x1f 0x3c
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
0xc9 0xc4 0x3a 0xac
# CHECK: ld $26, 3958($zero)
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt index 41808c724e..2dfde0d231 100644 --- a/test/MC/Disassembler/Mips/mips64r2.txt +++ b/test/MC/Disassembler/Mips/mips64r2.txt @@ -3,7 +3,7 @@ # CHECK: daddiu $11, $26, 31949
0x67 0x4b 0x7c 0xcd
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
0x00 0x2b 0xd0 0x2d
# CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@ # CHECK: dsllv $gp, $27, $24
0x03 0x1b 0xe0 0x14
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
0x00 0x01 0x0f 0xbb
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
0x03 0xc1 0x08 0x17
# CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@ # CHECK: dsubu $gp, $27, $24
0x03 0x78 0xe0 0x2f
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
0x8c 0x3b 0xc4 0xcd
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
0x3c 0x01 0x00 0x01
# CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@ # CHECK: lui $ra, 1
0x3c 0x1f 0x00 0x01
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
0xac 0x3a 0xc4 0xc9
# CHECK: ld $26, 3958($zero)
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt index 4987f80af9..620d9ebe8d 100644 --- a/test/MC/Disassembler/Mips/mips64r2_le.txt +++ b/test/MC/Disassembler/Mips/mips64r2_le.txt @@ -3,7 +3,7 @@ # CHECK: daddiu $11, $26, 31949
0xcd 0x7c 0x4b 0x67
-# CHECK: daddu $26, $at, $11
+# CHECK: daddu $26, $1, $11
0x2d 0xd0 0x2b 0x00
# CHECK: ddiv $zero, $26, $22
@@ -30,10 +30,10 @@ # CHECK: dsllv $gp, $27, $24
0x14 0xe0 0x1b 0x03
-# CHECK: dsra $at, $at, 30
+# CHECK: dsra $1, $1, 30
0xbb 0x0f 0x01 0x00
-# CHECK: dsrav $at, $at, $fp
+# CHECK: dsrav $1, $1, $fp
0x17 0x08 0xc1 0x03
# CHECK: dsrl $10, $gp, 24
@@ -45,10 +45,10 @@ # CHECK: dsubu $gp, $27, $24
0x2f 0xe0 0x78 0x03
-# CHECK: lw $27, -15155($at)
+# CHECK: lw $27, -15155($1)
0xcd 0xc4 0x3b 0x8c
-# CHECK: lui $at, 1
+# CHECK: lui $1, 1
0x01 0x00 0x01 0x3c
# CHECK: lwu $3, -1746($3)
@@ -57,7 +57,7 @@ # CHECK: lui $ra, 1
0x01 0x00 0x1f 0x3c
-# CHECK: sw $26, -15159($at)
+# CHECK: sw $26, -15159($1)
0xc9 0xc4 0x3a 0xac
# CHECK: ld $26, 3958($zero)
diff --git a/test/MC/MachO/gen-dwarf-cpp.s b/test/MC/MachO/gen-dwarf-cpp.s new file mode 100644 index 0000000000..cb749f48ee --- /dev/null +++ b/test/MC/MachO/gen-dwarf-cpp.s @@ -0,0 +1,22 @@ +// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t +// RUN: llvm-dwarfdump %t | FileCheck %s + +# 100 "t.s" 1 +.globl _bar +_bar: + movl $0, %eax +L1: leave + ret + +// rdar://9275556 + +// We check that the source name "t.s" is picked up +// CHECK: Dir Mod Time File Len File Name +// CHECK: ---- ---------- ---------- --------------------------- +// CHECK: file_names[ 1] 1 0x00000000 0x00000000 gen-dwarf-cpp.s +// CHECK: file_names[ 2] 0 0x00000000 0x00000000 t.s + +// We check that the source line number 100 is picked up before the "movl" +// CHECK: Address Line Column File ISA Flags +// CHECK: ------------------ ------ ------ ------ --- ------------- +// CHECK: 0x0000000000000000 102 0 2 0 is_stmt diff --git a/test/MC/Markup/basic-markup.mc b/test/MC/Markup/basic-markup.mc new file mode 100644 index 0000000000..2fa5ebb28f --- /dev/null +++ b/test/MC/Markup/basic-markup.mc @@ -0,0 +1,16 @@ +// RUN: llvm-mcmarkup %s | FileCheck %s + + push {<reg:r1>, <reg:r2>, <reg:r7>} + sub <reg:sp>, <imm:#132> + ldr <reg:r0>, <mem:[<reg:r0>, <imm:#4>]> + + +// CHECK: reg +// CHECK: reg +// CHECK: reg +// CHECK: reg +// CHECK: imm +// CHECK: reg +// CHECK: mem +// CHECK: reg +// CHECK: imm diff --git a/test/MC/Markup/lit.local.cfg b/test/MC/Markup/lit.local.cfg new file mode 100644 index 0000000000..ab28eedae2 --- /dev/null +++ b/test/MC/Markup/lit.local.cfg @@ -0,0 +1,2 @@ +config.suffixes = ['.mc'] + diff --git a/test/MC/Mips/sext_64_32.ll b/test/MC/Mips/sext_64_32.ll index e5c57b8c41..9e0cfa01fd 100644 --- a/test/MC/Mips/sext_64_32.ll +++ b/test/MC/Mips/sext_64_32.ll @@ -2,7 +2,7 @@ ; Sign extend from 32 to 64 was creating nonsense opcodes -; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0 +; CHECK: sll ${{[a-z0-9]+}}, ${{[a-z0-9]+}}, 0 define i64 @foo(i32 %ival) nounwind readnone { entry: @@ -10,7 +10,7 @@ entry: ret i64 %conv } -; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 0 +; CHECK: dsll32 ${{[a-z0-9]+}}, ${{[a-z0-9]+}}, 0 define i64 @foo_2(i32 %ival_2) nounwind readnone { entry: diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s index a5e80b2c93..73d5878b41 100644 --- a/test/MC/X86/x86-32-ms-inline-asm.s +++ b/test/MC/X86/x86-32-ms-inline-asm.s @@ -8,3 +8,53 @@ mov [ebx].4, ecx // CHECK: movl %ecx, 4(%ebx) // CHECK: encoding: [0x89,0x4b,0x04] +_t21: ## @t21 +// CHECK: t21 + mov eax, [4*eax + 4] +// CHECK: movl 4(,%eax,4), %eax +// CHECK: # encoding: [0x8b,0x04,0x85,0x04,0x00,0x00,0x00] + mov eax, [4*eax][4] +// CHECK: movl 4(,%eax,4), %eax +// CHECK: # encoding: [0x8b,0x04,0x85,0x04,0x00,0x00,0x00] + + mov eax, [esi + eax] +// CHECK: movl (%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x04,0x06] + mov eax, [esi][eax] +// CHECK: movl (%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x04,0x06] + + mov eax, [esi + 4*eax] +// CHECK: movl (%esi,%eax,4), %eax +// CHECK: # encoding: [0x8b,0x04,0x86] + mov eax, [esi][4*eax] +// CHECK: movl (%esi,%eax,4), %eax +// CHECK: # encoding: [0x8b,0x04,0x86] + + mov eax, [esi + eax + 4] +// CHECK: movl 4(%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x44,0x06,0x04] + mov eax, [esi][eax + 4] +// CHECK: movl 4(%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x44,0x06,0x04] + mov eax, [esi + eax][4] +// CHECK: movl 4(%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x44,0x06,0x04] + mov eax, [esi][eax][4] +// CHECK: movl 4(%esi,%eax), %eax +// CHECK: # encoding: [0x8b,0x44,0x06,0x04] + + mov eax, [esi + 2*eax + 4] +// CHECK: movl 4(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x04] + mov eax, [esi][2*eax + 4] +// CHECK: movl 4(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x04] + mov eax, [esi + 2*eax][4] +// CHECK: movl 4(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x04] + mov eax, [esi][2*eax][4] +// CHECK: movl 4(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x04] + + ret diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll new file mode 100644 index 0000000000..d5bab4b3f3 --- /dev/null +++ b/test/Other/extract-alias.ll @@ -0,0 +1,49 @@ +; RUN: llvm-extract -func foo -S < %s | FileCheck %s +; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s +; RUN: llvm-extract -alias zeda0 -S < %s | FileCheck --check-prefix=ALIAS %s +; RUN: llvm-extract -ralias .*bar -S < %s | FileCheck --check-prefix=ALIASRE %s + +; Both aliases should be converted to declarations +; CHECK: @zeda0 = external global i32 +; CHECK: define i32* @foo() { +; CHECK-NEXT: call void @a0bar() +; CHECK-NEXT: ret i32* @zeda0 +; CHECK-NEXT: } +; CHECK: declare void @a0bar() + +; DELETE: @zed = global i32 0 +; DELETE: @zeda0 = alias i32* @zed +; DELETE-NEXT: @a0foo = alias i32* ()* @foo +; DELETE-NEXT: @a0a0bar = alias void ()* @a0bar +; DELETE-NEXT: @a0bar = alias void ()* @bar +; DELETE: declare i32* @foo() +; DELETE: define void @bar() { +; DELETE-NEXT: %c = call i32* @foo() +; DELETE-NEXT: ret void +; DELETE-NEXT: } + +; ALIAS: @zed = external global i32 +; ALIAS: @zeda0 = alias i32* @zed + +; ALIASRE: @a0a0bar = alias void ()* @a0bar +; ALIASRE: @a0bar = alias void ()* @bar +; ALIASRE: declare void @bar() + +@zed = global i32 0 +@zeda0 = alias i32* @zed + +@a0foo = alias i32* ()* @foo + +define i32* @foo() { + call void @a0bar() + ret i32* @zeda0 +} + +@a0a0bar = alias void ()* @a0bar + +@a0bar = alias void ()* @bar + +define void @bar() { + %c = call i32* @foo() + ret void +} diff --git a/test/Other/extract-weak-odr.ll b/test/Other/extract-weak-odr.ll new file mode 100644 index 0000000000..6618f58436 --- /dev/null +++ b/test/Other/extract-weak-odr.ll @@ -0,0 +1,23 @@ +; RUN: llvm-extract -func foo -S < %s | FileCheck %s +; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s + +; Test that we don't convert weak_odr to external definitions. + +; CHECK: @bar = external global i32 +; CHECK: define weak_odr i32* @foo() { +; CHECK-NEXT: ret i32* @bar +; CHECK-NEXT: } + +; DELETE: @bar = weak_odr global i32 42 +; DELETE: declare i32* @foo() + +@bar = weak_odr global i32 42 + +define weak_odr i32* @foo() { + ret i32* @bar +} + +define void @g() { + %c = call i32* @foo() + ret void +} diff --git a/test/Other/extract.ll b/test/Other/extract.ll index 57573ed76f..8b0c835d57 100644 --- a/test/Other/extract.ll +++ b/test/Other/extract.ll @@ -7,18 +7,19 @@ ; llvm-extract uses lazy bitcode loading, so make sure it correctly reads ; from bitcode files in addition to assembly files. -; CHECK: define void @foo() { +; CHECK: define hidden void @foo() { ; CHECK: ret void ; CHECK: } -; The linkonce_odr linkage for foo() should be changed to external linkage. -; DELETE: declare void @foo() +; The private linkage for foo() should be changed to external linkage and +; hidden visibility added. +; DELETE: declare hidden void @foo() ; DELETE: define void @bar() { ; DELETE: call void @foo() ; DELETE: ret void ; DELETE: } -define linkonce_odr void @foo() { +define private void @foo() { ret void } define void @bar() { diff --git a/test/Other/link-opts.ll b/test/Other/link-opts.ll new file mode 100644 index 0000000000..8e58ac8a56 --- /dev/null +++ b/test/Other/link-opts.ll @@ -0,0 +1,13 @@ +;RUN: opt -S -std-link-opts < %s | FileCheck %s +; Simple test to check that -std-link-opts keeps only the main function. + +; CHECK-NOT: define +; CHECK: define void @main +; CHECK-NOT: define +define void @main() { + ret void +} + +define void @foo() { + ret void +} diff --git a/test/Other/multi-pointer-size.ll b/test/Other/multi-pointer-size.ll deleted file mode 100644 index 95fa54b8f2..0000000000 --- a/test/Other/multi-pointer-size.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: opt -instcombine %s | llvm-dis | FileCheck %s -target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16--p4:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" - -define i32 @test_as0(i32 addrspace(0)* %A) { -entry: -; CHECK: %arrayidx = getelementptr i32* %A, i32 1 - %arrayidx = getelementptr i32 addrspace(0)* %A, i64 1 - %y = load i32 addrspace(0)* %arrayidx, align 4 - ret i32 %y -} - -define i32 @test_as1(i32 addrspace(1)* %A) { -entry: -; CHECK: %arrayidx = getelementptr i32 addrspace(1)* %A, i64 1 - %arrayidx = getelementptr i32 addrspace(1)* %A, i32 1 - %y = load i32 addrspace(1)* %arrayidx, align 4 - ret i32 %y -} - -define i32 @test_as2(i32 addrspace(2)* %A) { -entry: -; CHECK: %arrayidx = getelementptr i32 addrspace(2)* %A, i8 1 - %arrayidx = getelementptr i32 addrspace(2)* %A, i32 1 - %y = load i32 addrspace(2)* %arrayidx, align 4 - ret i32 %y -} - -define i32 @test_as3(i32 addrspace(3)* %A) { -entry: -; CHECK: %arrayidx = getelementptr i32 addrspace(3)* %A, i16 1 - %arrayidx = getelementptr i32 addrspace(3)* %A, i32 1 - %y = load i32 addrspace(3)* %arrayidx, align 4 - ret i32 %y -} - -define i32 @test_as4(i32 addrspace(4)* %A) { -entry: -; CHECK: %arrayidx = getelementptr i32 addrspace(4)* %A, i96 1 - %arrayidx = getelementptr i32 addrspace(4)* %A, i32 1 - %y = load i32 addrspace(4)* %arrayidx, align 4 - ret i32 %y -} - diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll new file mode 100644 index 0000000000..493f23b098 --- /dev/null +++ b/test/Transforms/BBVectorize/X86/loop1.ll @@ -0,0 +1,53 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL +; The second check covers the use of alias analysis (with loop unrolling). + +define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable { +entry: + br label %for.body +; CHECK: @test1 +; CHECK-UNRL: @test1 + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv + %1 = load double* %arrayidx2, align 8 + %mul = fmul double %0, %0 + %mul3 = fmul double %0, %1 + %add = fadd double %mul, %mul3 + %add4 = fadd double %1, %1 + %add5 = fadd double %add4, %0 + %mul6 = fmul double %0, %add5 + %add7 = fadd double %add, %mul6 + %mul8 = fmul double %1, %1 + %add9 = fadd double %0, %0 + %add10 = fadd double %add9, %0 + %mul11 = fmul double %mul8, %add10 + %add12 = fadd double %add7, %mul11 + %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv + store double %add12, double* %arrayidx14, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10 + br i1 %exitcond, label %for.end, label %for.body +; CHECK-NOT: <2 x double> +; CHECK-UNRL: %mul = fmul <2 x double> %2, %2 +; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3 +; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3 +; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3 +; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2 +; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5 +; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6 +; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3 +; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2 +; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2 +; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10 +; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11 + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/test/Transforms/BBVectorize/X86/simple-ldstr.ll new file mode 100644 index 0000000000..0124399bad --- /dev/null +++ b/test/Transforms/BBVectorize/X86/simple-ldstr.ll @@ -0,0 +1,29 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s + +; Simple 3-pair chain with loads and stores +define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly { +entry: + %i0 = load double* %a, align 8 + %i1 = load double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double* %a, i64 1 + %i3 = load double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double* %b, i64 1 + %i4 = load double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + store double %mul, double* %c, align 8 + %arrayidx5 = getelementptr inbounds double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + ret void +; CHECK: @test1 +; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>* +; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>* +; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8 +; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8 +; CHECK: %mul = fmul <2 x double> %i0, %i1 +; CHECK: %0 = bitcast double* %c to <2 x double>* +; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8 +; CHECK: ret void +} + diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll new file mode 100644 index 0000000000..0113e38bb1 --- /dev/null +++ b/test/Transforms/BBVectorize/X86/simple.ll @@ -0,0 +1,103 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s + +; Basic depth-3 chain +define double @test1(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test1 +; CHECK-NOT: fmul <2 x double> +; CHECK: ret double %R +} + +; Basic chain +define double @test1a(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %W1 = fadd double %Y1, %Z1 + %W2 = fadd double %Y2, %Z2 + %V1 = fadd double %W1, %Z1 + %V2 = fadd double %W2, %Z2 + %Q1 = fadd double %W1, %V1 + %Q2 = fadd double %W2, %V2 + %S1 = fadd double %W1, %Q1 + %S2 = fadd double %W2, %Q2 + %R = fmul double %S1, %S2 + ret double %R +; CHECK: @test1a +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 +; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 +; CHECK: %W1 = fadd <2 x double> %Y1, %Z1 +; CHECK: %V1 = fadd <2 x double> %W1, %Z1 +; CHECK: %Q1 = fadd <2 x double> %W1, %V1 +; CHECK: %S1 = fadd <2 x double> %W1, %Q1 +; CHECK: %S1.v.r1 = extractelement <2 x double> %S1, i32 0 +; CHECK: %S1.v.r2 = extractelement <2 x double> %S1, i32 1 +; CHECK: %R = fmul double %S1.v.r1, %S1.v.r2 +; CHECK: ret double %R +} + +; Basic depth-3 chain (last pair permuted) +define double @test2(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test2 +; CHECK-NOT: fmul <2 x double> +; CHECK: ret double %R +} + +; Basic depth-4 chain (internal permutation) +define double @test4(double %A1, double %A2, double %B1, double %B2) { + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 + %Z1 = fadd double %Y2, %B1 + %Z2 = fadd double %Y1, %B2 + %W1 = fadd double %Y2, %Z1 + %W2 = fadd double %Y1, %Z2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test4 +; CHECK-NOT: fmul <2 x double> +; CHECK: ret double %R +} + +; Basic chain with shuffles +define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { + %X1 = sub <8 x i8> %A1, %B1 + %X2 = sub <8 x i8> %A2, %B2 + %Y1 = mul <8 x i8> %X1, %A1 + %Y2 = mul <8 x i8> %X2, %A2 + %Z1 = add <8 x i8> %Y1, %B1 + %Z2 = add <8 x i8> %Y2, %B2 + %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3> + %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1> + %R = mul <8 x i8> %Q1, %Q2 + ret <8 x i8> %R +; CHECK: @test6 +; CHECK-NOT: sub <16 x i8> +; CHECK: ret <8 x i8> +} + diff --git a/test/Transforms/BBVectorize/X86/vs-cast.ll b/test/Transforms/BBVectorize/X86/vs-cast.ll new file mode 100644 index 0000000000..be3efca925 --- /dev/null +++ b/test/Transforms/BBVectorize/X86/vs-cast.ll @@ -0,0 +1,12 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s + +define void @main() nounwind uwtable { +entry: + %0 = bitcast <2 x i64> undef to i128 + %1 = bitcast <2 x i64> undef to i128 + ret void +; CHECK: @main +} + diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll index 32a91ceee0..e8e82ce024 100644 --- a/test/Transforms/BBVectorize/cycle.ll +++ b/test/Transforms/BBVectorize/cycle.ll @@ -107,6 +107,6 @@ done: ret void ; CHECK: @test1 ; CHECK: go: -; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0 +; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0 ; FIXME: When tree pruning is deterministic, include the entire output. } diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg index 19eebc0ac7..a8ad0f1a28 100644 --- a/test/Transforms/BBVectorize/lit.local.cfg +++ b/test/Transforms/BBVectorize/lit.local.cfg @@ -1 +1,6 @@ config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll index bebc91ad91..c22ea5852a 100644 --- a/test/Transforms/BBVectorize/loop1.ll +++ b/test/Transforms/BBVectorize/loop1.ll @@ -42,8 +42,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: %mul = fmul double %0, %0 ; CHECK: %mul3 = fmul double %0, %1 ; CHECK: %add = fadd double %mul, %mul3 -; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %mul8 = fmul double %1, %1 +; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1 ; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2 ; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0 diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll index d9945b5630..aeaf98865b 100644 --- a/test/Transforms/BBVectorize/search-limit.ll +++ b/test/Transforms/BBVectorize/search-limit.ll @@ -7,8 +7,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK-SL4: @test1 ; CHECK-SL4-NOT: <2 x double> ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index 6844977143..ae1d63bfd8 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -17,8 +17,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, ret double %R ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 @@ -43,8 +43,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ret double %R ; CHECK: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) @@ -68,8 +68,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { ret double %R ; CHECK: @test3 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll index 6294543cd8..d46f7692b6 100644 --- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll +++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll @@ -2,6 +2,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s ; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO +; FIXME: re-enable this once pointer vectors work properly +; XFAIL: * + ; Simple 3-pair chain also with loads and stores (using ptrs and gep) define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly { entry: diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll index c1e6a09eea..7dd77c933f 100644 --- a/test/Transforms/BBVectorize/simple-ldstr.ll +++ b/test/Transforms/BBVectorize/simple-ldstr.ll @@ -94,13 +94,13 @@ entry: ; CHECK-AO: @test3 ; CHECK-AO: %i0 = load double* %a, align 8 ; CHECK-AO: %i1 = load double* %b, align 8 -; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0 -; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0 ; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1 ; CHECK-AO: %i3 = load double* %arrayidx3, align 8 ; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1 ; CHECK-AO: %i4 = load double* %arrayidx4, align 8 +; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0 ; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1 +; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0 ; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1 ; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2 ; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float> @@ -140,3 +140,31 @@ if.end: ; CHECK-AO-NOT: <2 x double> } +; Simple 3-pair chain with loads and stores +define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly { +entry: + %i0 = load double* %a, align 8 + %i1 = load double* %b, align 8 + %mul = fmul double %i0, %i1 + %arrayidx3 = getelementptr inbounds double* %a, i64 1 + %i3 = load double* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds double* %b, i64 1 + %i4 = load double* %arrayidx4, align 8 + %mul5 = fmul double %i3, %i4 + %arrayidx5 = getelementptr inbounds double* %c, i64 1 + store double %mul5, double* %arrayidx5, align 8 + store double %mul, double* %c, align 4 + ret void +; CHECK: @test5 +; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>* +; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>* +; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8 +; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8 +; CHECK: %mul = fmul <2 x double> %i0, %i1 +; CHECK: %0 = bitcast double* %c to <2 x double>* +; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4 +; CHECK: ret void +; CHECK-AO: @test5 +; CHECK-AO-NOT: <2 x double> +} + diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll index 325792a5dc..15ecb59702 100644 --- a/test/Transforms/BBVectorize/simple-sel.ll +++ b/test/Transforms/BBVectorize/simple-sel.ll @@ -6,8 +6,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) { ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -33,8 +33,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test2 ; CHECK-NB: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index 88eb9c90f7..d9a12eebed 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -29,8 +29,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) { define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -40,12 +40,13 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 %Z1 = fadd double %Y2, %B1 %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 +; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0 +; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1 +; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2 %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 +; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0 +; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1 +; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1 ret double %R ; CHECK: ret double %R } @@ -54,8 +55,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { define double @test3(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test3 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -79,8 +80,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2) { define double @test4(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test4 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -148,4 +149,27 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK: ret <8 x i8> %R } +; Basic depth-3 chain (flipped order) +define double @test7(double %A1, double %A2, double %B1, double %B2) { +; CHECK: @test7 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 + %Z2 = fadd double %Y2, %B2 + %Z1 = fadd double %Y1, %B1 +; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 + %R = fmul double %Z1, %Z2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 + ret double %R +; CHECK: ret double %R +} diff --git a/test/Transforms/IndVarSimplify/verify-scev.ll b/test/Transforms/IndVarSimplify/verify-scev.ll new file mode 100644 index 0000000000..019f5830d5 --- /dev/null +++ b/test/Transforms/IndVarSimplify/verify-scev.ll @@ -0,0 +1,421 @@ +; RUN: opt < %s -S -indvars -verify-scev +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +define void @test1() nounwind uwtable ssp { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + br i1 false, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + br i1 undef, label %for.end11, label %for.body3 + +for.body3: ; preds = %for.end + unreachable + +for.end11: ; preds = %for.end + br i1 undef, label %while.body, label %while.end + +while.body: ; preds = %for.end11 + unreachable + +while.end: ; preds = %for.end11 + br i1 undef, label %if.end115, label %for.cond109 + +for.cond109: ; preds = %while.end + unreachable + +if.end115: ; preds = %while.end + br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612 + +while.body119.lr.ph.lr.ph: ; preds = %if.end115 + br i1 undef, label %for.cond612, label %if.end123.us + +if.end123.us: ; preds = %while.body119.lr.ph.lr.ph + br label %for.cond132.us + +for.cond132.us: ; preds = %for.cond132.us, %if.end123.us + br i1 undef, label %if.then136.us, label %for.cond132.us + +if.then136.us: ; preds = %for.cond132.us + br i1 undef, label %while.end220, label %while.body211 + +while.body211: ; preds = %while.body211, %if.then136.us + br i1 undef, label %while.end220, label %while.body211 + +while.end220: ; preds = %while.body211, %if.then136.us + br label %for.cond246.outer + +for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220 + br label %for.cond246 + +for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer + br i1 undef, label %for.end562, label %if.end250 + +if.end250: ; preds = %for.cond246 + br i1 undef, label %if.end256, label %for.end562 + +if.end256: ; preds = %if.end250 + %cmp272 = icmp eq i32 undef, undef + br i1 %cmp272, label %if.then274, label %for.cond404.preheader + +for.cond404.preheader: ; preds = %if.end256 + br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph + +for.body409.lr.ph: ; preds = %for.cond404.preheader + br label %for.body409 + +if.then274: ; preds = %if.end256 + br i1 undef, label %for.cond246.outer, label %if.end309 + +if.end309: ; preds = %if.then274 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.body361: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.cond372.loopexit: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond394.preheader, label %for.cond246 + +for.cond394.preheader: ; preds = %for.cond372.loopexit + br i1 undef, label %for.cond246.outer, label %for.body397 + +for.body397: ; preds = %for.cond394.preheader + unreachable + +for.body409: ; preds = %for.inc558, %for.body409.lr.ph + %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ] + br i1 undef, label %if.then412, label %if.else433 + +if.then412: ; preds = %for.body409 + br label %if.end440 + +if.else433: ; preds = %for.body409 + br label %if.end440 + +if.end440: ; preds = %if.else433, %if.then412 + br i1 undef, label %for.inc558, label %if.end461 + +if.end461: ; preds = %if.end440 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.body517: ; preds = %for.body517, %if.end461 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.cond528.loopexit: ; preds = %for.body517, %if.end461 + br label %for.inc558 + +for.inc558: ; preds = %for.cond528.loopexit, %if.end440 + %inc559 = add nsw i32 %k.029, 1 + %cmp407 = icmp sgt i32 %inc559, undef + br i1 %cmp407, label %for.cond246.outer, label %for.body409 + +for.end562: ; preds = %if.end250, %for.cond246 + unreachable + +for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115 + unreachable +} + +define void @test2() nounwind uwtable ssp { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + br i1 undef, label %for.end11, label %for.body3 + +for.body3: ; preds = %for.end + unreachable + +for.end11: ; preds = %for.end + br i1 undef, label %while.body, label %while.end + +while.body: ; preds = %for.end11 + unreachable + +while.end: ; preds = %for.end11 + br i1 undef, label %if.end115, label %for.cond109 + +for.cond109: ; preds = %while.end + unreachable + +if.end115: ; preds = %while.end + br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612 + +while.body119.lr.ph.lr.ph: ; preds = %if.end115 + br i1 undef, label %for.cond612, label %if.end123.us + +if.end123.us: ; preds = %while.body119.lr.ph.lr.ph + br label %for.cond132.us + +for.cond132.us: ; preds = %for.cond132.us, %if.end123.us + br i1 undef, label %if.then136.us, label %for.cond132.us + +if.then136.us: ; preds = %for.cond132.us + br i1 undef, label %while.end220, label %while.body211 + +while.body211: ; preds = %while.body211, %if.then136.us + br i1 undef, label %while.end220, label %while.body211 + +while.end220: ; preds = %while.body211, %if.then136.us + br label %for.cond246.outer + +for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220 + br label %for.cond246 + +for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer + br i1 undef, label %for.end562, label %if.end250 + +if.end250: ; preds = %for.cond246 + br i1 undef, label %if.end256, label %for.end562 + +if.end256: ; preds = %if.end250 + %0 = load i32* undef, align 4 + br i1 undef, label %if.then274, label %for.cond404.preheader + +for.cond404.preheader: ; preds = %if.end256 + %add406 = add i32 0, %0 + br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph + +for.body409.lr.ph: ; preds = %for.cond404.preheader + br label %for.body409 + +if.then274: ; preds = %if.end256 + br i1 undef, label %for.cond246.outer, label %if.end309 + +if.end309: ; preds = %if.then274 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.body361: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.cond372.loopexit: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond394.preheader, label %for.cond246 + +for.cond394.preheader: ; preds = %for.cond372.loopexit + br i1 undef, label %for.cond246.outer, label %for.body397 + +for.body397: ; preds = %for.cond394.preheader + unreachable + +for.body409: ; preds = %for.inc558, %for.body409.lr.ph + %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ] + br i1 undef, label %if.then412, label %if.else433 + +if.then412: ; preds = %for.body409 + br label %if.end440 + +if.else433: ; preds = %for.body409 + br label %if.end440 + +if.end440: ; preds = %if.else433, %if.then412 + br i1 undef, label %for.inc558, label %if.end461 + +if.end461: ; preds = %if.end440 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.body517: ; preds = %for.body517, %if.end461 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.cond528.loopexit: ; preds = %for.body517, %if.end461 + br label %for.inc558 + +for.inc558: ; preds = %for.cond528.loopexit, %if.end440 + %inc559 = add nsw i32 %k.029, 1 + %cmp407 = icmp sgt i32 %inc559, %add406 + br i1 %cmp407, label %for.cond246.outer, label %for.body409 + +for.end562: ; preds = %if.end250, %for.cond246 + unreachable + +for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115 + unreachable +} + +define void @test3() nounwind uwtable ssp { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + br i1 undef, label %for.end11, label %for.body3 + +for.body3: ; preds = %for.end + unreachable + +for.end11: ; preds = %for.end + br i1 undef, label %while.body, label %while.end + +while.body: ; preds = %for.end11 + unreachable + +while.end: ; preds = %for.end11 + br i1 undef, label %if.end115, label %for.cond109 + +for.cond109: ; preds = %while.end + unreachable + +if.end115: ; preds = %while.end + br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612 + +while.body119.lr.ph.lr.ph: ; preds = %if.end115 + br i1 undef, label %for.cond612, label %if.end123.us + +if.end123.us: ; preds = %while.body119.lr.ph.lr.ph + br label %for.cond132.us + +for.cond132.us: ; preds = %for.cond132.us, %if.end123.us + br i1 undef, label %if.then136.us, label %for.cond132.us + +if.then136.us: ; preds = %for.cond132.us + br i1 undef, label %while.end220, label %while.body211 + +while.body211: ; preds = %while.body211, %if.then136.us + br i1 undef, label %while.end220, label %while.body211 + +while.end220: ; preds = %while.body211, %if.then136.us + br label %for.cond246.outer + +for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220 + br label %for.cond246 + +for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer + br i1 undef, label %for.end562, label %if.end250 + +if.end250: ; preds = %for.cond246 + br i1 undef, label %if.end256, label %for.end562 + +if.end256: ; preds = %if.end250 + br i1 undef, label %if.then274, label %for.cond404.preheader + +for.cond404.preheader: ; preds = %if.end256 + br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph + +for.body409.lr.ph: ; preds = %for.cond404.preheader + br label %for.body409 + +if.then274: ; preds = %if.end256 + br i1 undef, label %for.cond246.outer, label %if.end309 + +if.end309: ; preds = %if.then274 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.body361: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond372.loopexit, label %for.body361 + +for.cond372.loopexit: ; preds = %for.body361, %if.end309 + br i1 undef, label %for.cond394.preheader, label %for.cond246 + +for.cond394.preheader: ; preds = %for.cond372.loopexit + br i1 undef, label %for.cond246.outer, label %for.body397 + +for.body397: ; preds = %for.cond394.preheader + unreachable + +for.body409: ; preds = %for.inc558, %for.body409.lr.ph + br i1 undef, label %if.then412, label %if.else433 + +if.then412: ; preds = %for.body409 + br label %if.end440 + +if.else433: ; preds = %for.body409 + br label %if.end440 + +if.end440: ; preds = %if.else433, %if.then412 + br i1 undef, label %for.inc558, label %if.end461 + +if.end461: ; preds = %if.end440 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.body517: ; preds = %for.body517, %if.end461 + br i1 undef, label %for.cond528.loopexit, label %for.body517 + +for.cond528.loopexit: ; preds = %for.body517, %if.end461 + br label %for.inc558 + +for.inc558: ; preds = %for.cond528.loopexit, %if.end440 + br i1 undef, label %for.cond246.outer, label %for.body409 + +for.end562: ; preds = %if.end250, %for.cond246 + unreachable + +for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115 + unreachable +} + +define void @test4() nounwind uwtable ssp { +entry: + br i1 undef, label %if.end8, label %if.else + +if.else: ; preds = %entry + br label %if.end8 + +if.end8: ; preds = %if.else, %entry + br i1 undef, label %if.end26, label %if.else22 + +if.else22: ; preds = %if.end8 + br label %if.end26 + +if.end26: ; preds = %if.else22, %if.end8 + br i1 undef, label %if.end35, label %if.else31 + +if.else31: ; preds = %if.end26 + br label %if.end35 + +if.end35: ; preds = %if.else31, %if.end26 + br i1 undef, label %for.end226, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %if.end35 + br label %for.body48 + +for.body48: ; preds = %for.inc221, %for.body.lr.ph + br i1 undef, label %for.inc221, label %for.body65.lr.ph + +for.body65.lr.ph: ; preds = %for.body48 + %0 = load i32* undef, align 4 + br label %for.body65.us + +for.body65.us: ; preds = %for.inc219.us, %for.body65.lr.ph + %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ] + %idxprom66.us = sext i32 %k.09.us to i64 + br i1 undef, label %for.inc219.us, label %if.end72.us + +if.end72.us: ; preds = %for.body65.us + br i1 undef, label %if.end93.us, label %if.then76.us + +if.then76.us: ; preds = %if.end72.us + br label %if.end93.us + +if.end93.us: ; preds = %if.then76.us, %if.end72.us + br i1 undef, label %if.end110.us, label %for.inc219.us + +if.end110.us: ; preds = %if.end93.us + br i1 undef, label %for.inc219.us, label %for.body142.us + +for.body142.us: ; preds = %for.cond139.loopexit.us, %if.end110.us + br label %for.cond152.us + +for.cond152.us: ; preds = %for.cond152.us, %for.body142.us + br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us + +for.inc219.us: ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us + %inc.us = add nsw i32 %k.09.us, 1 + %cmp64.us = icmp sgt i32 %inc.us, %0 + br i1 %cmp64.us, label %for.inc221, label %for.body65.us + +for.cond139.loopexit.us: ; preds = %for.cond152.us + br i1 undef, label %for.inc219.us, label %for.body142.us + +for.inc221: ; preds = %for.inc219.us, %for.body48 + br label %for.body48 + +for.end226: ; preds = %if.end35 + ret void +} diff --git a/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll b/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll new file mode 100644 index 0000000000..20ea282687 --- /dev/null +++ b/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -instcombine -S + +; Make sure that we don't crash when optimizing the vectors of pointers. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.hoge = type { double*, double*, double*, double** } + +define void @widget(%struct.hoge* nocapture %arg) nounwind uwtable ssp { +bb: + %tmp = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 0 + br i1 undef, label %bb1, label %bb17 + +bb1: ; preds = %bb + br i1 undef, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb17 + +bb3: ; preds = %bb1 + %tmp4 = bitcast double** %tmp to <2 x double*>* + %tmp5 = load <2 x double*>* %tmp4, align 8 + %tmp6 = ptrtoint <2 x double*> %tmp5 to <2 x i64> + %tmp7 = sub <2 x i64> zeroinitializer, %tmp6 + %tmp8 = ashr exact <2 x i64> %tmp7, <i64 3, i64 3> + %tmp9 = extractelement <2 x i64> %tmp8, i32 0 + %tmp10 = add nsw i64 undef, %tmp9 + br i1 undef, label %bb11, label %bb12 + +bb11: ; preds = %bb3 + br label %bb13 + +bb12: ; preds = %bb3 + br label %bb13 + +bb13: ; preds = %bb12, %bb11 + br i1 undef, label %bb16, label %bb14 + +bb14: ; preds = %bb13 + br i1 undef, label %bb16, label %bb15 + +bb15: ; preds = %bb14 + br label %bb16 + +bb16: ; preds = %bb15, %bb14, %bb13 + unreachable + +bb17: ; preds = %bb2, %bb + ret void +} diff --git a/test/Transforms/InstCombine/constant-fold-gep-as-0.ll b/test/Transforms/InstCombine/constant-fold-gep-as-0.ll deleted file mode 100644 index 74fe316137..0000000000 --- a/test/Transforms/InstCombine/constant-fold-gep-as-0.ll +++ /dev/null @@ -1,235 +0,0 @@ -; "PLAIN" - No optimizations. This tests the target-independent -; constant folder. -; RUN: opt -S -o - < %s | FileCheck --check-prefix=PLAIN %s - -target datalayout = "e-p:128:128:128-p1:32:32:32-p2:8:8:8-p3:16:16:16-p4:64:64:64-p5:96:96:96-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32" - -; PLAIN: ModuleID = '<stdin>' - -; The automatic constant folder in opt does not have targetdata access, so -; it can't fold gep arithmetic, in general. However, the constant folder run -; from instcombine and global opt can use targetdata. -; PLAIN: @G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) -@G8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) -; PLAIN: @G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1) -@G1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -1) -; PLAIN: @F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) -@F8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) -; PLAIN: @F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2) -@F1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 1 to i1 addrspace(2)*), i8 -2) -; PLAIN: @H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) -@H8 = global i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) -; PLAIN: @H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i8 -1) -@H1 = global i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i8 0 to i1 addrspace(2)*), i8 -1) - - -; The target-independent folder should be able to do some clever -; simplifications on sizeof, alignof, and offsetof expressions. The -; target-dependent folder should fold these down to constants. -; PLAIN-X: @a = constant i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) -@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]} addrspace(4)* getelementptr ({[7 x double], [7 x double]} addrspace(4)* null, i64 11) to i64), i64 5)) - -; PLAIN-X: @b = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) -@b = constant i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) - -; PLAIN-X: @c = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) -@c = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) - -; PLAIN-X: @d = constant i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) -@d = constant i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) - -; PLAIN-X: @e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) -@e = constant i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) - -; PLAIN-X: @f = constant i64 1 -@f = constant i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) - -; PLAIN-X: @g = constant i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) -@g = constant i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) - -; PLAIN-X: @h = constant i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) -@h = constant i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i64 1) to i64) - -; PLAIN-X: @i = constant i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) -@i = constant i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double} addrspace(4)* null, i64 0, i32 1) to i64) - -; The target-dependent folder should cast GEP indices to integer-sized pointers. - -; PLAIN: @M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1) -; PLAIN: @N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1) -; PLAIN: @O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1) - -@M = constant i64 addrspace(5)* getelementptr (i64 addrspace(5)* null, i32 1) -@N = constant i64 addrspace(5)* getelementptr ({ i64, i64 } addrspace(5)* null, i32 0, i32 1) -@O = constant i64 addrspace(5)* getelementptr ([2 x i64] addrspace(5)* null, i32 0, i32 1) - -; Fold GEP of a GEP. Very simple cases are folded. - -; PLAIN-X: @Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 2) -@ext = external addrspace(3) global [3 x { i32, i32 }] -@Y = global [3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 1), i64 1) - -; PLAIN-X: @Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1) -@Z = global i32addrspace(3)* getelementptr inbounds (i32addrspace(3)* getelementptr inbounds ([3 x { i32, i32 }]addrspace(3)* @ext, i64 0, i64 1, i32 0), i64 1) - - -; Duplicate all of the above as function return values rather than -; global initializers. - -; PLAIN: define i8 addrspace(1)* @goo8() nounwind { -; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* -; PLAIN: ret i8 addrspace(1)* %t -; PLAIN: } -; PLAIN: define i1 addrspace(2)* @goo1() nounwind { -; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* -; PLAIN: ret i1 addrspace(2)* %t -; PLAIN: } -; PLAIN: define i8 addrspace(1)* @foo8() nounwind { -; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)* -; PLAIN: ret i8 addrspace(1)* %t -; PLAIN: } -; PLAIN: define i1 addrspace(2)* @foo1() nounwind { -; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)* -; PLAIN: ret i1 addrspace(2)* %t -; PLAIN: } -; PLAIN: define i8 addrspace(1)* @hoo8() nounwind { -; PLAIN: %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* null, i32 -1) to i8 addrspace(1)* -; PLAIN: ret i8 addrspace(1)* %t -; PLAIN: } -; PLAIN: define i1 addrspace(2)* @hoo1() nounwind { -; PLAIN: %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 -1) to i1 addrspace(2)* -; PLAIN: ret i1 addrspace(2)* %t -; PLAIN: } -define i8 addrspace(1)* @goo8() nounwind { - %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* - ret i8 addrspace(1)* %t -} -define i1 addrspace(2)* @goo1() nounwind { - %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* - ret i1 addrspace(2)* %t -} -define i8 addrspace(1)* @foo8() nounwind { - %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 1 to i8 addrspace(1)*), i32 -2) to i8 addrspace(1)* - ret i8 addrspace(1)* %t -} -define i1 addrspace(2)* @foo1() nounwind { - %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 1 to i1 addrspace(2)*), i32 -2) to i1 addrspace(2)* - ret i1 addrspace(2)* %t -} -define i8 addrspace(1)* @hoo8() nounwind { - %t = bitcast i8 addrspace(1)* getelementptr (i8 addrspace(1)* inttoptr (i32 0 to i8 addrspace(1)*), i32 -1) to i8 addrspace(1)* - ret i8 addrspace(1)* %t -} -define i1 addrspace(2)* @hoo1() nounwind { - %t = bitcast i1 addrspace(2)* getelementptr (i1 addrspace(2)* inttoptr (i32 0 to i1 addrspace(2)*), i32 -1) to i1 addrspace(2)* - ret i1 addrspace(2)* %t -} - -; PLAIN-X: define i64 @fa() nounwind { -; PLAIN-X: %t = bitcast i64 mul (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2310) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fb() nounwind { -; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fc() nounwind { -; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 2) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fd() nounwind { -; PLAIN-X: %t = bitcast i64 mul nuw (i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64), i64 11) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fe() nounwind { -; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @ff() nounwind { -; PLAIN-X: %t = bitcast i64 1 to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fg() nounwind { -; PLAIN-X: %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fh() nounwind { -; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr (i1 addrspace(2)* null, i32 1) to i64) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -; PLAIN-X: define i64 @fi() nounwind { -; PLAIN-X: %t = bitcast i64 ptrtoint (i1 addrspace(2)* getelementptr ({ i1, i1 addrspace(2)* }* null, i64 0, i32 1) to i64) to i64 -; PLAIN-X: ret i64 %t -; PLAIN-X: } -define i64 @fa() nounwind { - %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 - ret i64 %t -} -define i64 @fb() nounwind { - %t = bitcast i64 ptrtoint ([13 x double] addrspace(4)* getelementptr ({i1, [13 x double]} addrspace(4)* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fc() nounwind { - %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, double, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64 - ret i64 %t -} -define i64 @fd() nounwind { - %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ([13 x double] addrspace(4)* null, i64 0, i32 11) to i64) to i64 - ret i64 %t -} -define i64 @fe() nounwind { - %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({double, float, double, double} addrspace(4)* null, i64 0, i32 2) to i64) to i64 - ret i64 %t -} -define i64 @ff() nounwind { - %t = bitcast i64 ptrtoint (<{ i16, i128 }> addrspace(4)* getelementptr ({i1, <{ i16, i128 }>} addrspace(4)* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fg() nounwind { - %t = bitcast i64 ptrtoint ({double, double} addrspace(4)* getelementptr ({i1, {double, double}} addrspace(4)* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fh() nounwind { - %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr (double addrspace(4)* null, i32 1) to i64) to i64 - ret i64 %t -} -define i64 @fi() nounwind { - %t = bitcast i64 ptrtoint (double addrspace(4)* getelementptr ({i1, double}addrspace(4)* null, i64 0, i32 1) to i64) to i64 - ret i64 %t -} - -; PLAIN: define i64* @fM() nounwind { -; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* -; PLAIN: ret i64* %t -; PLAIN: } -; PLAIN: define i64* @fN() nounwind { -; PLAIN: %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64* -; PLAIN: ret i64* %t -; PLAIN: } -; PLAIN: define i64* @fO() nounwind { -; PLAIN: %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* -; PLAIN: ret i64* %t -; PLAIN: } - -define i64* @fM() nounwind { - %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* - ret i64* %t -} -define i64* @fN() nounwind { - %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64* - ret i64* %t -} -define i64* @fO() nounwind { - %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* - ret i64* %t -} - -; PLAIN: define i32 addrspace(1)* @fZ() nounwind { -; PLAIN: %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)* -; PLAIN: ret i32 addrspace(1)* %t -; PLAIN: } -@ext2 = external addrspace(1) global [3 x { i32, i32 }] -define i32 addrspace(1)* @fZ() nounwind { - %t = bitcast i32 addrspace(1)* getelementptr inbounds (i32 addrspace(1)* getelementptr inbounds ([3 x { i32, i32 }] addrspace(1)* @ext2, i64 0, i64 1, i32 0), i64 1) to i32 addrspace(1)* - ret i32 addrspace(1)* %t -} diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll index fc1ced052a..376fa079d2 100644 --- a/test/Transforms/InstCombine/fcmp.ll +++ b/test/Transforms/InstCombine/fcmp.ll @@ -54,9 +54,8 @@ define i1 @test7(float %x) nounwind readnone ssp noredzone { %ext = fpext float %x to ppc_fp128 %cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000 ret i1 %cmp -; Can't convert ppc_fp128 ; CHECK: @test7 -; CHECK-NEXT: fpext float %x to ppc_fp128 +; CHECK-NEXT: fcmp ogt float %x, 0.000000e+00 } define float @test8(float %x) nounwind readnone optsize ssp { diff --git a/test/Transforms/InstCombine/stpcpy-1.ll b/test/Transforms/InstCombine/stpcpy-1.ll new file mode 100644 index 0000000000..8b6bb0e0d5 --- /dev/null +++ b/test/Transforms/InstCombine/stpcpy-1.ll @@ -0,0 +1,46 @@ +; Test that the stpcpy library call simplifier works correctly. +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 +@b = common global [32 x i8] zeroinitializer, align 1 + +declare i8* @stpcpy(i8*, i8*) + +define i8* @test_simplify1() { +; CHECK: @test_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + %ret = call i8* @stpcpy(i8* %dst, i8* %src) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 +; CHECK-NEXT: getelementptr inbounds ([32 x i8]* @a, i32 0, i32 5) + ret i8* %ret +} + +define i8* @test_simplify2() { +; CHECK: @test_simplify2 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + + %ret = call i8* @stpcpy(i8* %dst, i8* %dst) +; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen +; CHECK-NEXT: getelementptr inbounds [32 x i8]* @a, i32 0, i32 [[LEN]] + ret i8* %ret +} + +define i8* @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8]* @b, i32 0, i32 0 + + %ret = call i8* @stpcpy(i8* %dst, i8* %src) +; CHECK: call i8* @stpcpy + ret i8* %ret +} diff --git a/test/Transforms/InstCombine/stpcpy-2.ll b/test/Transforms/InstCombine/stpcpy-2.ll new file mode 100644 index 0000000000..2e92c0895e --- /dev/null +++ b/test/Transforms/InstCombine/stpcpy-2.ll @@ -0,0 +1,22 @@ +; Test that the stpcpy library call simplifier works correctly. +; RUN: opt < %s -instcombine -S | FileCheck %s +; +; This transformation requires the pointer size, as it assumes that size_t is +; the size of a pointer. +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 + +declare i16* @stpcpy(i8*, i8*) + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i16* @stpcpy(i8* %dst, i8* %src) +; CHECK: call i16* @stpcpy + ret void +} diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll new file mode 100644 index 0000000000..05603918c6 --- /dev/null +++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll @@ -0,0 +1,96 @@ +; Test lib call simplification of __stpcpy_chk calls with various values +; for src, dst, and slen. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i8] zeroinitializer, align 1 +@b = common global [60 x i8] zeroinitializer, align 1 +@.str = private constant [12 x i8] c"abcdefghijk\00" + +; Check cases where slen >= strlen (src). + +define void @test_simplify1() { +; CHECK: @test_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60) + ret void +} + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12) + ret void +} + +define void @test_simplify3() { +; CHECK: @test_simplify3 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret void +} + +; Check cases where there are no string constants. + +define void @test_simplify4() { +; CHECK: @test_simplify4 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @stpcpy + call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1) + ret void +} + +; Check case where the string length is not constant. + +define i8* @test_simplify5() { +; CHECK: @test_simplify5 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 + +; CHECK: @__memcpy_chk + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len) +; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11) + ret i8* %ret +} + +; Check case where the source and destination are the same. + +define i8* @test_simplify6() { +; CHECK: @test_simplify6 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + +; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen +; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]] + %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false) + %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len) + ret i8* %ret +} + +; Check case where slen < strlen (src). + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 + %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 + +; CHECK-NEXT: call i8* @__stpcpy_chk + call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8) + ret void +} + +declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind +declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly diff --git a/test/Transforms/InstCombine/stpcpy_chk-2.ll b/test/Transforms/InstCombine/stpcpy_chk-2.ll new file mode 100644 index 0000000000..46c2139276 --- /dev/null +++ b/test/Transforms/InstCombine/stpcpy_chk-2.ll @@ -0,0 +1,21 @@ +; Test that lib call simplification doesn't simplify __stpcpy_chk calls +; with the wrong prototype. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@a = common global [60 x i16] zeroinitializer, align 1 +@.str = private constant [8 x i8] c"abcdefg\00" + +define void @test_no_simplify() { +; CHECK: @test_no_simplify + %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0 + %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + +; CHECK-NEXT: call i16* @__strcpy_chk + call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8) + ret void +} + +declare i16* @__strcpy_chk(i16*, i8*, i32) diff --git a/test/Transforms/InstCombine/strlen-1.ll b/test/Transforms/InstCombine/strlen-1.ll new file mode 100644 index 0000000000..6d7464a4cc --- /dev/null +++ b/test/Transforms/InstCombine/strlen-1.ll @@ -0,0 +1,97 @@ +; Test that the strlen library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [6 x i8] c"hello\00" +@null = constant [1 x i8] zeroinitializer +@null_hello = constant [7 x i8] c"\00hello\00" +@nullstring = constant i8 0 +@a = common global [32 x i8] zeroinitializer, align 1 + +declare i32 @strlen(i8*) + +; Check strlen(string constant) -> integer constant. + +define i32 @test_simplify1() { +; CHECK: @test_simplify1 + %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 + %hello_l = call i32 @strlen(i8* %hello_p) + ret i32 %hello_l +; CHECK-NEXT: ret i32 5 +} + +define i32 @test_simplify2() { +; CHECK: @test_simplify2 + %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 + %null_l = call i32 @strlen(i8* %null_p) + ret i32 %null_l +; CHECK-NEXT: ret i32 0 +} + +define i32 @test_simplify3() { +; CHECK: @test_simplify3 + %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 + %null_hello_l = call i32 @strlen(i8* %null_hello_p) + ret i32 %null_hello_l +; CHECK-NEXT: ret i32 0 +} + +define i32 @test_simplify4() { +; CHECK: @test_simplify4 + %len = tail call i32 @strlen(i8* @nullstring) nounwind + ret i32 %len +; CHECK-NEXT: ret i32 0 +} + +; Check strlen(x) == 0 --> *x == 0. + +define i1 @test_simplify5() { +; CHECK: @test_simplify5 + %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 + %hello_l = call i32 @strlen(i8* %hello_p) + %eq_hello = icmp eq i32 %hello_l, 0 + ret i1 %eq_hello +; CHECK-NEXT: ret i1 false +} + +define i1 @test_simplify6() { +; CHECK: @test_simplify6 + %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 + %null_l = call i32 @strlen(i8* %null_p) + %eq_null = icmp eq i32 %null_l, 0 + ret i1 %eq_null +; CHECK-NEXT: ret i1 true +} + +; Check strlen(x) != 0 --> *x != 0. + +define i1 @test_simplify7() { +; CHECK: @test_simplify7 + %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 + %hello_l = call i32 @strlen(i8* %hello_p) + %ne_hello = icmp ne i32 %hello_l, 0 + ret i1 %ne_hello +; CHECK-NEXT: ret i1 true +} + +define i1 @test_simplify8() { +; CHECK: @test_simplify8 + %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 + %null_l = call i32 @strlen(i8* %null_p) + %ne_null = icmp ne i32 %null_l, 0 + ret i1 %ne_null +; CHECK-NEXT: ret i1 false +} + +; Check cases that shouldn't be simplified. + +define i32 @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %a_p = getelementptr [32 x i8]* @a, i32 0, i32 0 + %a_l = call i32 @strlen(i8* %a_p) +; CHECK-NEXT: %a_l = call i32 @strlen + ret i32 %a_l +; CHECK-NEXT: ret i32 %a_l +} diff --git a/test/Transforms/InstCombine/strlen-2.ll b/test/Transforms/InstCombine/strlen-2.ll new file mode 100644 index 0000000000..c4fd54c06d --- /dev/null +++ b/test/Transforms/InstCombine/strlen-2.ll @@ -0,0 +1,18 @@ +; Test that the strlen library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [6 x i8] c"hello\00" + +declare i32 @strlen(i8*, i32) + +define i32 @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 + %hello_l = call i32 @strlen(i8* %hello_p, i32 187) +; CHECK-NEXT: %hello_l = call i32 @strlen + ret i32 %hello_l +; CHECK-NEXT: ret i32 %hello_l +} diff --git a/test/Transforms/InstCombine/strncpy-1.ll b/test/Transforms/InstCombine/strncpy-1.ll new file mode 100644 index 0000000000..3ce2b9b5ee --- /dev/null +++ b/test/Transforms/InstCombine/strncpy-1.ll @@ -0,0 +1,95 @@ +; Test that the strncpy library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [6 x i8] c"hello\00" +@null = constant [1 x i8] zeroinitializer +@null_hello = constant [7 x i8] c"\00hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 +@b = common global [32 x i8] zeroinitializer, align 1 + +declare i8* @strncpy(i8*, i8*, i32) +declare i32 @puts(i8*) + +; Check a bunch of strncpy invocations together. + +define i32 @test_simplify1() { +; CHECK: @test_simplify1 +; CHECK-NOT: call i8* @strncpy +; CHECK: call i32 @puts + %target = alloca [1024 x i8] + %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 + store i8 0, i8* %arg1 + + %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 + %rslt1 = call i8* @strncpy(i8* %arg1, i8* %arg2, i32 6) + + %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0 + %rslt2 = call i8* @strncpy(i8* %rslt1, i8* %arg3, i32 42) + + %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 + %rslt3 = call i8* @strncpy(i8* %rslt2, i8* %arg4, i32 42) + + call i32 @puts( i8* %rslt3 ) + ret i32 0 +} + +; Check strncpy(x, "", y) -> memset(x, '\0', y, 1). + +define void @test_simplify2() { +; CHECK: @test_simplify2 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [1 x i8]* @null, i32 0, i32 0 + + call i8* @strncpy(i8* %dst, i8* %src, i32 32) +; CHECK: call void @llvm.memset.p0i8.i32 + ret void +} + +; Check strncpy(x, y, 0) -> x. + +define i8* @test_simplify3() { +; CHECK: @test_simplify3 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + %ret = call i8* @strncpy(i8* %dst, i8* %src, i32 0) + ret i8* %ret +; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0) +} + +; Check strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]. + +define void @test_simplify4() { +; CHECK: @test_simplify4 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i8* @strncpy(i8* %dst, i8* %src, i32 6) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32 + ret void +} + +; Check cases that shouldn't be simplified. + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [32 x i8]* @b, i32 0, i32 0 + + call i8* @strncpy(i8* %dst, i8* %src, i32 32) +; CHECK: call i8* @strncpy + ret void +} + +define void @test_no_simplify2() { +; CHECK: @test_no_simplify2 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i8* @strncpy(i8* %dst, i8* %src, i32 8) +; CHECK: call i8* @strncpy + ret void +} diff --git a/test/Transforms/InstCombine/strncpy-2.ll b/test/Transforms/InstCombine/strncpy-2.ll new file mode 100644 index 0000000000..ac28ea6550 --- /dev/null +++ b/test/Transforms/InstCombine/strncpy-2.ll @@ -0,0 +1,22 @@ +; Test that the strncpy library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [6 x i8] c"hello\00" +@a = common global [32 x i8] zeroinitializer, align 1 + +declare i16* @strncpy(i8*, i8*, i32) + +; Check that 'strncpy' functions with the wrong prototype aren't simplified. + +define void @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %dst = getelementptr [32 x i8]* @a, i32 0, i32 0 + %src = getelementptr [6 x i8]* @hello, i32 0, i32 0 + + call i16* @strncpy(i8* %dst, i8* %src, i32 6) +; CHECK: call i16* @strncpy + ret void +} diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll index ae7e2fb5f1..aadff4268e 100644 --- a/test/Transforms/InstCombine/strncpy_chk-1.ll +++ b/test/Transforms/InstCombine/strncpy_chk-1.ll @@ -7,27 +7,27 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 @a = common global [60 x i8] zeroinitializer, align 1 @b = common global [60 x i8] zeroinitializer, align 1 -@.str = private constant [8 x i8] c"abcdefg\00" +@.str = private constant [12 x i8] c"abcdefghijk\00" ; Check cases where dstlen >= len define void @test_simplify1() { ; CHECK: @test_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strncpy - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) ret void } define void @test_simplify2() { ; CHECK: @test_simplify2 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 -; CHECK-NEXT: call i8* @strncpy - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 8) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32 + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12) ret void } @@ -37,7 +37,7 @@ define void @test_simplify3() { %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0 ; CHECK-NEXT: call i8* @strncpy - call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 60) + call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60) ret void } @@ -46,7 +46,7 @@ define void @test_simplify3() { define void @test_no_simplify1() { ; CHECK: @test_no_simplify1 %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0 - %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0 + %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0 ; CHECK-NEXT: call i8* @__strncpy_chk call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4) diff --git a/test/Transforms/InstCombine/strpbrk-1.ll b/test/Transforms/InstCombine/strpbrk-1.ll new file mode 100644 index 0000000000..a5d0d86501 --- /dev/null +++ b/test/Transforms/InstCombine/strpbrk-1.ll @@ -0,0 +1,68 @@ +; Test that the strpbrk library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [12 x i8] c"hello world\00" +@w = constant [2 x i8] c"w\00" +@null = constant [1 x i8] zeroinitializer + +declare i8* @strpbrk(i8*, i8*) + +; Check strpbrk(s, "") -> NULL. + +define i8* @test_simplify1(i8* %str) { +; CHECK: @test_simplify1 + %pat = getelementptr [1 x i8]* @null, i32 0, i32 0 + + %ret = call i8* @strpbrk(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: ret i8* null +} + +; Check strpbrk("", s) -> NULL. + +define i8* @test_simplify2(i8* %pat) { +; CHECK: @test_simplify2 + %str = getelementptr [1 x i8]* @null, i32 0, i32 0 + + %ret = call i8* @strpbrk(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: ret i8* null +} + +; Check strpbrk(s1, s2), where s1 and s2 are constants. + +define i8* @test_simplify3() { +; CHECK: @test_simplify3 + %str = getelementptr [12 x i8]* @hello, i32 0, i32 0 + %pat = getelementptr [2 x i8]* @w, i32 0, i32 0 + + %ret = call i8* @strpbrk(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: ret i8* getelementptr inbounds ([12 x i8]* @hello, i32 0, i32 6) +} + +; Check strpbrk(s, "a") -> strchr(s, 'a'). + +define i8* @test_simplify4(i8* %str) { +; CHECK: @test_simplify4 + %pat = getelementptr [2 x i8]* @w, i32 0, i32 0 + + %ret = call i8* @strpbrk(i8* %str, i8* %pat) +; CHECK-NEXT: [[VAR:%[a-z]+]] = call i8* @strchr(i8* %str, i32 119) + ret i8* %ret +; CHECK-NEXT: ret i8* [[VAR]] +} + +; Check cases that shouldn't be simplified. + +define i8* @test_no_simplify1(i8* %str, i8* %pat) { +; CHECK: @test_no_simplify1 + + %ret = call i8* @strpbrk(i8* %str, i8* %pat) +; CHECK-NEXT: %ret = call i8* @strpbrk(i8* %str, i8* %pat) + ret i8* %ret +; CHECK-NEXT: ret i8* %ret +} diff --git a/test/Transforms/InstCombine/strpbrk-2.ll b/test/Transforms/InstCombine/strpbrk-2.ll new file mode 100644 index 0000000000..31ac2905df --- /dev/null +++ b/test/Transforms/InstCombine/strpbrk-2.ll @@ -0,0 +1,23 @@ +; Test that the strpbrk library call simplifier works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +@hello = constant [12 x i8] c"hello world\00" +@w = constant [2 x i8] c"w\00" + +declare i16* @strpbrk(i8*, i8*) + +; Check that 'strpbrk' functions with the wrong prototype aren't simplified. + +define i16* @test_no_simplify1() { +; CHECK: @test_no_simplify1 + %str = getelementptr [12 x i8]* @hello, i32 0, i32 0 + %pat = getelementptr [2 x i8]* @w, i32 0, i32 0 + + %ret = call i16* @strpbrk(i8* %str, i8* %pat) +; CHECK-NEXT: %ret = call i16* @strpbrk + ret i16* %ret +; CHECK-NEXT: ret i16* %ret +} diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll new file mode 100644 index 0000000000..16c0c67970 --- /dev/null +++ b/test/Transforms/InstCombine/strto-1.ll @@ -0,0 +1,82 @@ +; Test that the strto* library call simplifiers works correctly. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +declare i64 @strtol(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare i64 @strtol(i8*, i8**, i32) + +declare double @strtod(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare double @strtod(i8*, i8**, i32) + +declare float @strtof(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare float @strtof(i8*, i8**, i32) + +declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare i64 @strtoul(i8*, i8**, i32) + +declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare i64 @strtoll(i8*, i8**, i32) + +declare double @strtold(i8* %s, i8** %endptr) +; CHECK: declare double @strtold(i8*, i8**) + +declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base) +; CHECK: declare i64 @strtoull(i8*, i8**, i32) + +define void @test_simplify1(i8* %x, i8** %endptr) { +; CHECK: @test_simplify1 + call i64 @strtol(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call i64 @strtol(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_simplify2(i8* %x, i8** %endptr) { +; CHECK: @test_simplify2 + call double @strtod(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call double @strtod(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_simplify3(i8* %x, i8** %endptr) { +; CHECK: @test_simplify3 + call float @strtof(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call float @strtof(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_simplify4(i8* %x, i8** %endptr) { +; CHECK: @test_simplify4 + call i64 @strtoul(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call i64 @strtoul(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_simplify5(i8* %x, i8** %endptr) { +; CHECK: @test_simplify5 + call i64 @strtoll(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call i64 @strtoll(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_simplify6(i8* %x, i8** %endptr) { +; CHECK: @test_simplify6 + call double @strtold(i8* %x, i8** null) +; CHECK-NEXT: call double @strtold(i8* nocapture %x, i8** null) + ret void +} + +define void @test_simplify7(i8* %x, i8** %endptr) { +; CHECK: @test_simplify7 + call i64 @strtoull(i8* %x, i8** null, i32 10) +; CHECK-NEXT: call i64 @strtoull(i8* nocapture %x, i8** null, i32 10) + ret void +} + +define void @test_no_simplify1(i8* %x, i8** %endptr) { +; CHECK: @test_no_simplify1 + call i64 @strtol(i8* %x, i8** %endptr, i32 10) +; CHECK-NEXT: call i64 @strtol(i8* %x, i8** %endptr, i32 10) + ret void +} diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll new file mode 100644 index 0000000000..20165b1100 --- /dev/null +++ b/test/Transforms/InstCombine/vector_gep2.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <2 x i8*> @testa(<2 x i8*> %a) { +; CHECK: @testa + %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 1> +; CHECK: getelementptr <2 x i8*> %a, <2 x i64> <i64 0, i64 1> + ret <2 x i8*> %g +} diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll index a85e834582..c07abb0c63 100644 --- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll +++ b/test/Transforms/Internalize/2008-05-09-AllButMain.ll @@ -1,27 +1,55 @@ -; No arguments means internalize all but main -; RUN: opt < %s -internalize -S | grep internal | count 4 +; No arguments means internalize everything +; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s + ; Internalize all but foo and j -; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | grep internal | count 3 -; Non existent files should be treated as if they were empty (so internalize all but main) -; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 4 -; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 3 +; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s + +; Non existent files should be treated as if they were empty (so internalize +; everything) +; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s + +; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null | FileCheck --check-prefix=LIST2 %s + ; -file and -list options should be merged, the .apifile contains foo and j -; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | grep internal | count 2 +; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s + +; NOARGS: @i = internal global +; LIST: @i = internal global +; EMPTYFILE: @i = internal global +; LIST2: @i = internal global +; MERGE: @i = internal global +@i = global i32 0 -@i = weak global i32 0 ; <i32*> [#uses=0] -@j = weak global i32 0 ; <i32*> [#uses=0] +; NOARGS: @j = internal global +; LIST: @j = global +; EMPTYFILE: @j = internal global +; LIST2: @j = internal global +; MERGE: @j = global +@j = global i32 0 -define void @main(...) { -entry: +; NOARGS: define internal void @main +; LIST: define internal void @main +; EMPTYFILE: define internal void @main +; LIST2: define internal void @main +; MERGE: define internal void @main +define void @main() { ret void } -define void @foo(...) { -entry: +; NOARGS: define internal void @foo +; LIST: define void @foo +; EMPTYFILE: define internal void @foo +; LIST2: define void @foo +; MERGE: define void @foo +define void @foo() { ret void } -define void @bar(...) { -entry: +; NOARGS: define internal void @bar +; LIST: define internal void @bar +; EMPTYFILE: define internal void @bar +; LIST2: define void @bar +; MERGE: define void @bar +define void @bar() { ret void } diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll index 7b18a04e11..47cf3f0373 100644 --- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll +++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -internalize -S | grep internal | count 3 +; RUN: opt < %s -internalize -internalize-public-api-list main -S | grep internal | count 3 @A = global i32 0 @B = alias i32* @A diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll index b9c03544db..2fe87464c1 100644 --- a/test/Transforms/JumpThreading/crash.ll +++ b/test/Transforms/JumpThreading/crash.ll @@ -511,3 +511,56 @@ lbl_260: ; preds = %for.cond, %entry if.end: ; preds = %for.cond ret void } + +define void @PR14233(i1 %cmp, i1 %cmp2, i1 %cmp3, i1 %cmp4) { +entry: + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: + br label %if.end + +cond.false: + br label %if.end + +if.end: + %A = phi i64 [ 0, %cond.true ], [ 1, %cond.false ] + br i1 %cmp2, label %bb, label %if.end2 + +bb: + br label %if.end2 + +if.end2: + %B = phi i64 [ ptrtoint (i8* ()* @PR14233.f1 to i64), %bb ], [ %A, %if.end ] + %cmp.ptr = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64) + br i1 %cmp.ptr, label %cond.true2, label %if.end3 + +cond.true2: + br i1 %cmp3, label %bb2, label %ur + +bb2: + br i1 %cmp4, label %if.end4, label %if.end3 + +if.end4: + unreachable + +if.end3: + %cmp.ptr2 = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64) + br i1 %cmp.ptr2, label %ur, label %if.then601 + +if.then601: + %C = icmp eq i64 %B, 0 + br i1 %C, label %bb3, label %bb4 + +bb3: + unreachable + +bb4: + unreachable + +ur: + unreachable +} + +declare i8* @PR14233.f1() + +declare i8* @PR14233.f2() diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll index 67c3951d74..fe8d445313 100644 --- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll +++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -licm | lli +; RUN: opt < %s -licm | lli %defaultjit define i32 @main() { entry: diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 46ab7e5542..06a5bd9086 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -383,4 +383,37 @@ for.end: ; preds = %for.inc } +define void @PR14241(i32* %s, i64 %size) { +; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught +; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy +; instead of a memmove. If we get the memmove transform back, this will catch +; regressions. +; +; CHECK: @PR14241 +entry: + %end.idx = add i64 %size, -1 + %end.ptr = getelementptr inbounds i32* %s, i64 %end.idx + br label %while.body +; CHECK-NOT: memcpy +; +; FIXME: When we regain the ability to form a memmove here, this test should be +; reversed and turned into a positive assertion. +; CHECK-NOT: memmove + +while.body: + %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ] + %src.ptr = getelementptr inbounds i32* %phi.ptr, i64 1 + %val = load i32* %src.ptr, align 4 +; CHECK: load + %dst.ptr = getelementptr inbounds i32* %phi.ptr, i64 0 + store i32 %val, i32* %dst.ptr, align 4 +; CHECK: store + %next.ptr = getelementptr inbounds i32* %phi.ptr, i64 1 + %cmp = icmp eq i32* %next.ptr, %end.ptr + br i1 %cmp, label %exit, label %while.body + +exit: + ret void +; CHECK: ret void +} diff --git a/test/Transforms/LoopIdiom/crash.ll b/test/Transforms/LoopIdiom/crash.ll new file mode 100644 index 0000000000..969adbcd76 --- /dev/null +++ b/test/Transforms/LoopIdiom/crash.ll @@ -0,0 +1,25 @@ +; RUN: opt -basicaa -loop-idiom -S < %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; Don't crash inside DependenceAnalysis +; PR14219 +define void @test1(i64* %iwork, i64 %x) { +bb0: + %mul116 = mul nsw i64 %x, %x + %incdec.ptr6.sum175 = add i64 42, %x + %arrayidx135 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum175 + br label %bb1 +bb1: + %storemerge4226 = phi i64 [ 0, %bb0 ], [ %inc139, %bb1 ] + store i64 1, i64* %arrayidx135, align 8 + %incdec.ptr6.sum176 = add i64 %mul116, %storemerge4226 + %arrayidx137 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum176 + store i64 1, i64* %arrayidx137, align 8 + %inc139 = add nsw i64 %storemerge4226, 1 + %cmp131 = icmp sgt i64 %storemerge4226, 42 + br i1 %cmp131, label %bb2, label %bb1 +bb2: + ret void +} + diff --git a/test/Transforms/LoopIdiom/scev-invalidation.ll b/test/Transforms/LoopIdiom/scev-invalidation.ll new file mode 100644 index 0000000000..a244d9a280 --- /dev/null +++ b/test/Transforms/LoopIdiom/scev-invalidation.ll @@ -0,0 +1,74 @@ +; RUN: opt -S -indvars -loop-idiom < %s +; PR14214 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @quote_arg() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %backslashes.0 = phi i32 [ undef, %entry ], [ %backslashes.2, %for.inc ] + %p.0 = phi i8* [ undef, %entry ], [ %incdec.ptr3, %for.inc ] + %q.0 = phi i8* [ undef, %entry ], [ %q.2, %for.inc ] + %0 = load i8* %p.0, align 1 + switch i8 %0, label %while.cond.preheader [ + i8 0, label %for.cond4.preheader + i8 92, label %for.inc + ] + +while.cond.preheader: ; preds = %for.cond + %tobool210 = icmp eq i32 %backslashes.0, 0 + br i1 %tobool210, label %for.inc.loopexit, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %while.cond.preheader + %1 = add i32 %backslashes.0, -1 + %2 = zext i32 %1 to i64 + br label %while.body + +for.cond4.preheader: ; preds = %for.cond + %tobool57 = icmp eq i32 %backslashes.0, 0 + br i1 %tobool57, label %for.end10, label %for.body6.lr.ph + +for.body6.lr.ph: ; preds = %for.cond4.preheader + br label %for.body6 + +while.body: ; preds = %while.body.lr.ph, %while.body + %q.112 = phi i8* [ %q.0, %while.body.lr.ph ], [ %incdec.ptr, %while.body ] + %backslashes.111 = phi i32 [ %backslashes.0, %while.body.lr.ph ], [ %dec, %while.body ] + %incdec.ptr = getelementptr inbounds i8* %q.112, i64 1 + store i8 92, i8* %incdec.ptr, align 1 + %dec = add nsw i32 %backslashes.111, -1 + %tobool2 = icmp eq i32 %dec, 0 + br i1 %tobool2, label %while.cond.for.inc.loopexit_crit_edge, label %while.body + +while.cond.for.inc.loopexit_crit_edge: ; preds = %while.body + %scevgep.sum = add i64 %2, 1 + %scevgep13 = getelementptr i8* %q.0, i64 %scevgep.sum + br label %for.inc.loopexit + +for.inc.loopexit: ; preds = %while.cond.for.inc.loopexit_crit_edge, %while.cond.preheader + %q.1.lcssa = phi i8* [ %scevgep13, %while.cond.for.inc.loopexit_crit_edge ], [ %q.0, %while.cond.preheader ] + br label %for.inc + +for.inc: ; preds = %for.inc.loopexit, %for.cond + %backslashes.2 = phi i32 [ %backslashes.0, %for.cond ], [ 0, %for.inc.loopexit ] + %q.2 = phi i8* [ %q.0, %for.cond ], [ %q.1.lcssa, %for.inc.loopexit ] + %incdec.ptr3 = getelementptr inbounds i8* %p.0, i64 1 + br label %for.cond + +for.body6: ; preds = %for.body6.lr.ph, %for.body6 + %q.39 = phi i8* [ %q.0, %for.body6.lr.ph ], [ %incdec.ptr7, %for.body6 ] + %backslashes.38 = phi i32 [ %backslashes.0, %for.body6.lr.ph ], [ %dec9, %for.body6 ] + %incdec.ptr7 = getelementptr inbounds i8* %q.39, i64 1 + store i8 92, i8* %incdec.ptr7, align 1 + %dec9 = add nsw i32 %backslashes.38, -1 + %tobool5 = icmp eq i32 %dec9, 0 + br i1 %tobool5, label %for.cond4.for.end10_crit_edge, label %for.body6 + +for.cond4.for.end10_crit_edge: ; preds = %for.body6 + br label %for.end10 + +for.end10: ; preds = %for.cond4.for.end10_crit_edge, %for.cond4.preheader + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index 12e8e55983..2516e248bc 100644 --- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce +; RUN: opt < %s -loop-vectorize -dce -force-vector-width=4 ; Check that we don't crash. diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll new file mode 100644 index 0000000000..a2d176a534 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/avx1.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @read_mod_write_single_ptr +;CHECK: load <8 x float> +;CHECK: ret i32 +define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds float* %a, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = fmul float %3, 3.000000e+00 + store float %4, float* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + + +;CHECK: @read_mod_i64 +;CHECK: load <8 x i64> +;CHECK: ret i32 +define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds i64* %a, i64 %indvars.iv + %3 = load i64* %2, align 4 + %4 = mul i64 %3, 3 + store i64 %4, i64* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll new file mode 100644 index 0000000000..8582613617 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @conversion_cost1 +;CHECK: store <8 x i8> +;CHECK: ret +define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 3 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ] + %2 = trunc i64 %indvars.iv to i8 + %3 = getelementptr inbounds i8* %A, i64 %indvars.iv + store i8 %2, i8* %3, align 1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + +;CHECK: @conversion_cost2 +;CHECK: store <8 x float> +;CHECK: ret +define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = add nsw i64 %indvars.iv, 3 + %3 = trunc i64 %2 to i32 + %4 = sitofp i32 %3 to float + %5 = getelementptr inbounds float* %B, i64 %indvars.iv + store float %4, float* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll index 18abf2885e..40e660855b 100644 --- a/test/Transforms/LoopVectorize/cost-model.ll +++ b/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll new file mode 100644 index 0000000000..574c529834 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 +@a = common global [2048 x i32] zeroinitializer, align 16 + +; Select VF = 8; +;CHECK: @example1 +;CHECK: load <8 x i32> +;CHECK: add nsw <8 x i32> +;CHECK: store <8 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + + +; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive. +;CHECK: @example10b +;CHECK: load <4 x i16> +;CHECK: sext <4 x i16> +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv + %3 = load i16* %2, align 2 + %4 = sext i16 %3 to i32 + %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv + store i32 %4, i32* %5, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %6, label %1 + +; <label>:6 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg new file mode 100644 index 0000000000..a8ad0f1a28 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll new file mode 100644 index 0000000000..2f22a76457 --- /dev/null +++ b/test/Transforms/LoopVectorize/flags.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @flags1 +;CHECK: load <4 x i32> +;CHECK: mul nsw <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret i32 +define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = mul nsw i32 %3, 3 + store i32 %4, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} + + +;CHECK: @flags2 +;CHECK: load <4 x i32> +;CHECK: mul <4 x i32> +;CHECK: store <4 x i32> +;CHECK: ret i32 +define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 9 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] + %2 = getelementptr inbounds i32* %A, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = mul i32 %3, 3 + store i32 %4, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index d8942ac861..fce29d2404 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: @example1 ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example1() nounwind uwtable ssp { @@ -227,6 +227,8 @@ define i32 @example9() nounwind uwtable readonly ssp { } ;CHECK: @example10a +;CHECK: load <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: load <4 x i16> ;CHECK: add <4 x i16> ;CHECK: store <4 x i16> diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll index 069b7ea031..71ea7689fc 100644 --- a/test/Transforms/LoopVectorize/increment.ll +++ b/test/Transforms/LoopVectorize/increment.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; } ;CHECK: @inc ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @inc(i32 %n) nounwind uwtable noinline ssp { diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll index 7727b0a2dc..1a6c15ed96 100644 --- a/test/Transforms/LoopVectorize/non-const-n.ll +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: shl i32 ;CHECK: zext i32 ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> +;CHECK: add nsw <4 x i32> ;CHECK: store <4 x i32> ;CHECK: ret void define void @example1(i32 %n) nounwind uwtable ssp { diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll index 746a08c3ea..c1848b35fc 100644 --- a/test/Transforms/LoopVectorize/reduction.ll +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -66,7 +66,7 @@ define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocap ;CHECK: @reduction_mix ;CHECK: phi <4 x i32> ;CHECK: load <4 x i32> -;CHECK: mul <4 x i32> +;CHECK: mul nsw <4 x i32> ;CHECK: ret i32 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { %1 = icmp sgt i32 %n, 0 @@ -151,6 +151,7 @@ for.end: ; preds = %for.body, %entry ;CHECK: @reduction_and ;CHECK: and <4 x i32> +;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1> ;CHECK: ret i32 define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { entry: diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll new file mode 100644 index 0000000000..4a6e4b231d --- /dev/null +++ b/test/Transforms/LoopVectorize/small-loop.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: @example1 +;CHECK-NOT: load <4 x i32> +;CHECK: ret void +define void @example1() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %6, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count. + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll new file mode 100644 index 0000000000..5aa3bc034d --- /dev/null +++ b/test/Transforms/LoopVectorize/start-non-zero.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @start_at_nonzero +;CHECK: mul nuw <4 x i32> +;CHECK: ret i32 +define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp { +entry: + %cmp3 = icmp slt i32 %start, %end + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %0 = sext i32 %start to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %1 = load i32* %arrayidx, align 4, !tbaa !0 + %mul = mul nuw i32 %1, 333 + store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %2 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %2, %end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret i32 4 +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll new file mode 100644 index 0000000000..eb02760413 --- /dev/null +++ b/test/Transforms/LoopVectorize/write-only.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: @read_mod_write_single_ptr +;CHECK: load <4 x float> +;CHECK: ret i32 +define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp { + %1 = icmp sgt i32 %n, 0 + br i1 %1, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0, %.lr.ph + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] + %2 = getelementptr inbounds float* %a, i64 %indvars.iv + %3 = load float* %2, align 4 + %4 = fmul float %3, 3.000000e+00 + store float %4, float* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret i32 undef +} diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll index 92051c62a7..02e084bf11 100644 --- a/test/Transforms/SROA/vector-promotion.ll +++ b/test/Transforms/SROA/vector-promotion.ll @@ -205,3 +205,18 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { %res = load i64* %addr, align 4 ret i64 %res } + +define i32 @PR14212() { +; CHECK: @PR14212 +; This caused a crash when "splitting" the load of the i32 in order to promote +; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case. +entry: + %retval = alloca <3 x i8>, align 4 +; CHECK-NOT: alloca + + store <3 x i8> undef, <3 x i8>* %retval, align 4 + %cast = bitcast <3 x i8>* %retval to i32* + %load = load i32* %cast, align 4 + ret i32 %load +; CHECK: ret i32 +} diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg new file mode 100644 index 0000000000..786fee9e66 --- /dev/null +++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'Sparc' in targets: + config.unsupported = True + diff --git a/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll new file mode 100644 index 0000000000..9d1568557f --- /dev/null +++ b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -simplifycfg -S -mtriple=sparc-unknown-unknown | FileCheck %s + +; Check that switches are not turned into lookup tables, as this is not +; considered profitable on the target. + +define i32 @f(i32 %c) nounwind uwtable readnone { +entry: + switch i32 %c, label %sw.default [ + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 + i32 46, label %sw.bb4 + i32 47, label %sw.bb5 + i32 48, label %sw.bb6 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.bb5: br label %return +sw.bb6: br label %return +sw.default: br label %return +return: + %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ] + ret i32 %retval.0 + +; CHECK: @f +; CHECK-NOT: getelementptr +; CHECK: switch i32 %c +} diff --git a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll index aa48ec6481..d358350554 100644 --- a/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll @@ -15,12 +15,21 @@ target triple = "x86_64-unknown-linux-gnu" ; The table for @earlyreturncrash ; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] +; The table for @large. +; CHECK: @switch.table4 = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9, + +; The table for @cprop +; CHECK: @switch.table5 = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7] + +; The table for @unreachable +; CHECK: @switch.table6 = private unnamed_addr constant [5 x i32] [i32 0, i32 0, i32 0, i32 1, i32 -1] + ; A simple int-to-int selection switch. ; It is dense enough to be replaced by table lookup. ; The result is directly by a ret from an otherwise empty bb, ; so we return early, directly from the lookup bb. -define i32 @f(i32 %c) nounwind uwtable readnone { +define i32 @f(i32 %c) { entry: switch i32 %c, label %sw.default [ i32 42, label %return @@ -233,7 +242,7 @@ lor.end: } ; PR13946 -define i32 @overflow(i32 %type) nounwind { +define i32 @overflow(i32 %type) { entry: switch i32 %type, label %sw.default [ i32 -2147483648, label %sw.bb @@ -244,23 +253,12 @@ entry: i32 3, label %sw.bb3 ] -sw.bb: - br label %if.end - -sw.bb1: - br label %if.end - -sw.bb2: - br label %if.end - -sw.bb3: - br label %if.end - -sw.default: - br label %if.end - -if.else: - br label %if.end +sw.bb: br label %if.end +sw.bb1: br label %if.end +sw.bb2: br label %if.end +sw.bb3: br label %if.end +sw.default: br label %if.end +if.else: br label %if.end if.end: %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ] @@ -271,7 +269,7 @@ if.end: } ; PR13985 -define i1 @undef(i32 %tmp) uwtable ssp { +define i1 @undef(i32 %tmp) { bb: switch i32 %tmp, label %bb3 [ i32 0, label %bb1 @@ -280,16 +278,502 @@ bb: i32 8, label %bb2 ] -bb1: ; preds = %bb, %bb - br label %bb3 - -bb2: ; preds = %bb, %bb - br label %bb3 +bb1: br label %bb3 +bb2: br label %bb3 -bb3: ; preds = %bb2, %bb1, %bb +bb3: %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ] ret i1 %tmp4 ; CHECK: define i1 @undef ; CHECK: %switch.cast = trunc i32 %switch.tableidx to i9 ; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt } + +; Also handle large switches that would be rejected by +; isValueEqualityComparison() +; CHECK: large +; CHECK-NOT: switch i32 +define i32 @large(i32 %x) { +entry: + %cmp = icmp slt i32 %x, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %mul = mul i32 %x, -10 + br label %if.end + +if.end: + %x.addr.0 = phi i32 [ %mul, %if.then ], [ %x, %entry ] + switch i32 %x.addr.0, label %return [ + i32 199, label %sw.bb203 + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + i32 5, label %sw.bb5 + i32 6, label %sw.bb6 + i32 7, label %sw.bb7 + i32 8, label %sw.bb8 + i32 9, label %sw.bb9 + i32 10, label %sw.bb10 + i32 11, label %sw.bb11 + i32 12, label %sw.bb12 + i32 13, label %sw.bb13 + i32 14, label %sw.bb14 + i32 15, label %sw.bb15 + i32 16, label %sw.bb16 + i32 17, label %sw.bb17 + i32 18, label %sw.bb18 + i32 19, label %sw.bb19 + i32 20, label %sw.bb20 + i32 21, label %sw.bb21 + i32 22, label %sw.bb22 + i32 23, label %sw.bb23 + i32 24, label %sw.bb24 + i32 25, label %sw.bb25 + i32 26, label %sw.bb26 + i32 27, label %sw.bb27 + i32 28, label %sw.bb28 + i32 29, label %sw.bb29 + i32 30, label %sw.bb30 + i32 31, label %sw.bb31 + i32 32, label %sw.bb32 + i32 33, label %sw.bb33 + i32 34, label %sw.bb34 + i32 35, label %sw.bb35 + i32 36, label %sw.bb37 + i32 37, label %sw.bb38 + i32 38, label %sw.bb39 + i32 39, label %sw.bb40 + i32 40, label %sw.bb41 + i32 41, label %sw.bb42 + i32 42, label %sw.bb43 + i32 43, label %sw.bb44 + i32 44, label %sw.bb45 + i32 45, label %sw.bb47 + i32 46, label %sw.bb48 + i32 47, label %sw.bb49 + i32 48, label %sw.bb50 + i32 49, label %sw.bb51 + i32 50, label %sw.bb52 + i32 51, label %sw.bb53 + i32 52, label %sw.bb54 + i32 53, label %sw.bb55 + i32 54, label %sw.bb56 + i32 55, label %sw.bb58 + i32 56, label %sw.bb59 + i32 57, label %sw.bb60 + i32 58, label %sw.bb61 + i32 59, label %sw.bb62 + i32 60, label %sw.bb63 + i32 61, label %sw.bb64 + i32 62, label %sw.bb65 + i32 63, label %sw.bb66 + i32 64, label %sw.bb67 + i32 65, label %sw.bb68 + i32 66, label %sw.bb69 + i32 67, label %sw.bb70 + i32 68, label %sw.bb71 + i32 69, label %sw.bb72 + i32 70, label %sw.bb73 + i32 71, label %sw.bb74 + i32 72, label %sw.bb76 + i32 73, label %sw.bb77 + i32 74, label %sw.bb78 + i32 75, label %sw.bb79 + i32 76, label %sw.bb80 + i32 77, label %sw.bb81 + i32 78, label %sw.bb82 + i32 79, label %sw.bb83 + i32 80, label %sw.bb84 + i32 81, label %sw.bb85 + i32 82, label %sw.bb86 + i32 83, label %sw.bb87 + i32 84, label %sw.bb88 + i32 85, label %sw.bb89 + i32 86, label %sw.bb90 + i32 87, label %sw.bb91 + i32 88, label %sw.bb92 + i32 89, label %sw.bb93 + i32 90, label %sw.bb94 + i32 91, label %sw.bb95 + i32 92, label %sw.bb96 + i32 93, label %sw.bb97 + i32 94, label %sw.bb98 + i32 95, label %sw.bb99 + i32 96, label %sw.bb100 + i32 97, label %sw.bb101 + i32 98, label %sw.bb102 + i32 99, label %sw.bb103 + i32 100, label %sw.bb104 + i32 101, label %sw.bb105 + i32 102, label %sw.bb106 + i32 103, label %sw.bb107 + i32 104, label %sw.bb108 + i32 105, label %sw.bb109 + i32 106, label %sw.bb110 + i32 107, label %sw.bb111 + i32 108, label %sw.bb112 + i32 109, label %sw.bb113 + i32 110, label %sw.bb114 + i32 111, label %sw.bb115 + i32 112, label %sw.bb116 + i32 113, label %sw.bb117 + i32 114, label %sw.bb118 + i32 115, label %sw.bb119 + i32 116, label %sw.bb120 + i32 117, label %sw.bb121 + i32 118, label %sw.bb122 + i32 119, label %sw.bb123 + i32 120, label %sw.bb124 + i32 121, label %sw.bb125 + i32 122, label %sw.bb126 + i32 123, label %sw.bb127 + i32 124, label %sw.bb128 + i32 125, label %sw.bb129 + i32 126, label %sw.bb130 + i32 127, label %sw.bb131 + i32 128, label %sw.bb132 + i32 129, label %sw.bb133 + i32 130, label %sw.bb134 + i32 131, label %sw.bb135 + i32 132, label %sw.bb136 + i32 133, label %sw.bb137 + i32 134, label %sw.bb138 + i32 135, label %sw.bb139 + i32 136, label %sw.bb140 + i32 137, label %sw.bb141 + i32 138, label %sw.bb142 + i32 139, label %sw.bb143 + i32 140, label %sw.bb144 + i32 141, label %sw.bb145 + i32 142, label %sw.bb146 + i32 143, label %sw.bb147 + i32 144, label %sw.bb148 + i32 145, label %sw.bb149 + i32 146, label %sw.bb150 + i32 147, label %sw.bb151 + i32 148, label %sw.bb152 + i32 149, label %sw.bb153 + i32 150, label %sw.bb154 + i32 151, label %sw.bb155 + i32 152, label %sw.bb156 + i32 153, label %sw.bb157 + i32 154, label %sw.bb158 + i32 155, label %sw.bb159 + i32 156, label %sw.bb160 + i32 157, label %sw.bb161 + i32 158, label %sw.bb162 + i32 159, label %sw.bb163 + i32 160, label %sw.bb164 + i32 161, label %sw.bb165 + i32 162, label %sw.bb166 + i32 163, label %sw.bb167 + i32 164, label %sw.bb168 + i32 165, label %sw.bb169 + i32 166, label %sw.bb170 + i32 167, label %sw.bb171 + i32 168, label %sw.bb172 + i32 169, label %sw.bb173 + i32 170, label %sw.bb174 + i32 171, label %sw.bb175 + i32 172, label %sw.bb176 + i32 173, label %sw.bb177 + i32 174, label %sw.bb178 + i32 175, label %sw.bb179 + i32 176, label %sw.bb180 + i32 177, label %sw.bb181 + i32 178, label %sw.bb182 + i32 179, label %sw.bb183 + i32 180, label %sw.bb184 + i32 181, label %sw.bb185 + i32 182, label %sw.bb186 + i32 183, label %sw.bb187 + i32 184, label %sw.bb188 + i32 185, label %sw.bb189 + i32 186, label %sw.bb190 + i32 187, label %sw.bb191 + i32 188, label %sw.bb192 + i32 189, label %sw.bb193 + i32 190, label %sw.bb194 + i32 191, label %sw.bb195 + i32 192, label %sw.bb196 + i32 193, label %sw.bb197 + i32 194, label %sw.bb198 + i32 195, label %sw.bb199 + i32 196, label %sw.bb200 + i32 197, label %sw.bb201 + i32 198, label %sw.bb202 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.bb5: br label %return +sw.bb6: br label %return +sw.bb7: br label %return +sw.bb8: br label %return +sw.bb9: br label %return +sw.bb10: br label %return +sw.bb11: br label %return +sw.bb12: br label %return +sw.bb13: br label %return +sw.bb14: br label %return +sw.bb15: br label %return +sw.bb16: br label %return +sw.bb17: br label %return +sw.bb18: br label %return +sw.bb19: br label %return +sw.bb20: br label %return +sw.bb21: br label %return +sw.bb22: br label %return +sw.bb23: br label %return +sw.bb24: br label %return +sw.bb25: br label %return +sw.bb26: br label %return +sw.bb27: br label %return +sw.bb28: br label %return +sw.bb29: br label %return +sw.bb30: br label %return +sw.bb31: br label %return +sw.bb32: br label %return +sw.bb33: br label %return +sw.bb34: br label %return +sw.bb35: br label %return +sw.bb37: br label %return +sw.bb38: br label %return +sw.bb39: br label %return +sw.bb40: br label %return +sw.bb41: br label %return +sw.bb42: br label %return +sw.bb43: br label %return +sw.bb44: br label %return +sw.bb45: br label %return +sw.bb47: br label %return +sw.bb48: br label %return +sw.bb49: br label %return +sw.bb50: br label %return +sw.bb51: br label %return +sw.bb52: br label %return +sw.bb53: br label %return +sw.bb54: br label %return +sw.bb55: br label %return +sw.bb56: br label %return +sw.bb58: br label %return +sw.bb59: br label %return +sw.bb60: br label %return +sw.bb61: br label %return +sw.bb62: br label %return +sw.bb63: br label %return +sw.bb64: br label %return +sw.bb65: br label %return +sw.bb66: br label %return +sw.bb67: br label %return +sw.bb68: br label %return +sw.bb69: br label %return +sw.bb70: br label %return +sw.bb71: br label %return +sw.bb72: br label %return +sw.bb73: br label %return +sw.bb74: br label %return +sw.bb76: br label %return +sw.bb77: br label %return +sw.bb78: br label %return +sw.bb79: br label %return +sw.bb80: br label %return +sw.bb81: br label %return +sw.bb82: br label %return +sw.bb83: br label %return +sw.bb84: br label %return +sw.bb85: br label %return +sw.bb86: br label %return +sw.bb87: br label %return +sw.bb88: br label %return +sw.bb89: br label %return +sw.bb90: br label %return +sw.bb91: br label %return +sw.bb92: br label %return +sw.bb93: br label %return +sw.bb94: br label %return +sw.bb95: br label %return +sw.bb96: br label %return +sw.bb97: br label %return +sw.bb98: br label %return +sw.bb99: br label %return +sw.bb100: br label %return +sw.bb101: br label %return +sw.bb102: br label %return +sw.bb103: br label %return +sw.bb104: br label %return +sw.bb105: br label %return +sw.bb106: br label %return +sw.bb107: br label %return +sw.bb108: br label %return +sw.bb109: br label %return +sw.bb110: br label %return +sw.bb111: br label %return +sw.bb112: br label %return +sw.bb113: br label %return +sw.bb114: br label %return +sw.bb115: br label %return +sw.bb116: br label %return +sw.bb117: br label %return +sw.bb118: br label %return +sw.bb119: br label %return +sw.bb120: br label %return +sw.bb121: br label %return +sw.bb122: br label %return +sw.bb123: br label %return +sw.bb124: br label %return +sw.bb125: br label %return +sw.bb126: br label %return +sw.bb127: br label %return +sw.bb128: br label %return +sw.bb129: br label %return +sw.bb130: br label %return +sw.bb131: br label %return +sw.bb132: br label %return +sw.bb133: br label %return +sw.bb134: br label %return +sw.bb135: br label %return +sw.bb136: br label %return +sw.bb137: br label %return +sw.bb138: br label %return +sw.bb139: br label %return +sw.bb140: br label %return +sw.bb141: br label %return +sw.bb142: br label %return +sw.bb143: br label %return +sw.bb144: br label %return +sw.bb145: br label %return +sw.bb146: br label %return +sw.bb147: br label %return +sw.bb148: br label %return +sw.bb149: br label %return +sw.bb150: br label %return +sw.bb151: br label %return +sw.bb152: br label %return +sw.bb153: br label %return +sw.bb154: br label %return +sw.bb155: br label %return +sw.bb156: br label %return +sw.bb157: br label %return +sw.bb158: br label %return +sw.bb159: br label %return +sw.bb160: br label %return +sw.bb161: br label %return +sw.bb162: br label %return +sw.bb163: br label %return +sw.bb164: br label %return +sw.bb165: br label %return +sw.bb166: br label %return +sw.bb167: br label %return +sw.bb168: br label %return +sw.bb169: br label %return +sw.bb170: br label %return +sw.bb171: br label %return +sw.bb172: br label %return +sw.bb173: br label %return +sw.bb174: br label %return +sw.bb175: br label %return +sw.bb176: br label %return +sw.bb177: br label %return +sw.bb178: br label %return +sw.bb179: br label %return +sw.bb180: br label %return +sw.bb181: br label %return +sw.bb182: br label %return +sw.bb183: br label %return +sw.bb184: br label %return +sw.bb185: br label %return +sw.bb186: br label %return +sw.bb187: br label %return +sw.bb188: br label %return +sw.bb189: br label %return +sw.bb190: br label %return +sw.bb191: br label %return +sw.bb192: br label %return +sw.bb193: br label %return +sw.bb194: br label %return +sw.bb195: br label %return +sw.bb196: br label %return +sw.bb197: br label %return +sw.bb198: br label %return +sw.bb199: br label %return +sw.bb200: br label %return +sw.bb201: br label %return +sw.bb202: br label %return +sw.bb203: br label %return + +return: + %retval.0 = phi i32 [ 39204, %sw.bb202 ], [ 38809, %sw.bb201 ], [ 38416, %sw.bb200 ], [ 38025, %sw.bb199 ], [ 37636, %sw.bb198 ], [ 37249, %sw.bb197 ], [ 36864, %sw.bb196 ], [ 36481, %sw.bb195 ], [ 36100, %sw.bb194 ], [ 35721, %sw.bb193 ], [ 35344, %sw.bb192 ], [ 34969, %sw.bb191 ], [ 34596, %sw.bb190 ], [ 34225, %sw.bb189 ], [ 33856, %sw.bb188 ], [ 33489, %sw.bb187 ], [ 33124, %sw.bb186 ], [ 32761, %sw.bb185 ], [ 32400, %sw.bb184 ], [ 32041, %sw.bb183 ], [ 31684, %sw.bb182 ], [ 31329, %sw.bb181 ], [ 30976, %sw.bb180 ], [ 30625, %sw.bb179 ], [ 30276, %sw.bb178 ], [ 29929, %sw.bb177 ], [ 29584, %sw.bb176 ], [ 29241, %sw.bb175 ], [ 28900, %sw.bb174 ], [ 28561, %sw.bb173 ], [ 28224, %sw.bb172 ], [ 27889, %sw.bb171 ], [ 27556, %sw.bb170 ], [ 27225, %sw.bb169 ], [ 26896, %sw.bb168 ], [ 26569, %sw.bb167 ], [ 26244, %sw.bb166 ], [ 25921, %sw.bb165 ], [ 25600, %sw.bb164 ], [ 25281, %sw.bb163 ], [ 24964, %sw.bb162 ], [ 24649, %sw.bb161 ], [ 24336, %sw.bb160 ], [ 24025, %sw.bb159 ], [ 23716, %sw.bb158 ], [ 23409, %sw.bb157 ], [ 23104, %sw.bb156 ], [ 22801, %sw.bb155 ], [ 22500, %sw.bb154 ], [ 22201, %sw.bb153 ], [ 21904, %sw.bb152 ], [ 21609, %sw.bb151 ], [ 21316, %sw.bb150 ], [ 21025, %sw.bb149 ], [ 20736, %sw.bb148 ], [ 20449, %sw.bb147 ], [ 20164, %sw.bb146 ], [ 19881, %sw.bb145 ], [ 19600, %sw.bb144 ], [ 19321, %sw.bb143 ], [ 19044, %sw.bb142 ], [ 18769, %sw.bb141 ], [ 18496, %sw.bb140 ], [ 18225, %sw.bb139 ], [ 17956, %sw.bb138 ], [ 17689, %sw.bb137 ], [ 17424, %sw.bb136 ], [ 17161, %sw.bb135 ], [ 16900, %sw.bb134 ], [ 16641, %sw.bb133 ], [ 16384, %sw.bb132 ], [ 16129, %sw.bb131 ], [ 15876, %sw.bb130 ], [ 15625, %sw.bb129 ], [ 15376, %sw.bb128 ], [ 15129, %sw.bb127 ], [ 14884, %sw.bb126 ], [ 14641, %sw.bb125 ], [ 14400, %sw.bb124 ], [ 14161, %sw.bb123 ], [ 13924, %sw.bb122 ], [ 13689, %sw.bb121 ], [ 13456, %sw.bb120 ], [ 13225, %sw.bb119 ], [ 12996, %sw.bb118 ], [ 12769, %sw.bb117 ], [ 12544, %sw.bb116 ], [ 12321, %sw.bb115 ], [ 12100, %sw.bb114 ], [ 11881, %sw.bb113 ], [ 11664, %sw.bb112 ], [ 11449, %sw.bb111 ], [ 11236, %sw.bb110 ], [ 11025, %sw.bb109 ], [ 10816, %sw.bb108 ], [ 10609, %sw.bb107 ], [ 10404, %sw.bb106 ], [ 10201, %sw.bb105 ], [ 10000, %sw.bb104 ], [ 9801, %sw.bb103 ], [ 9604, %sw.bb102 ], [ 9409, %sw.bb101 ], [ 9216, %sw.bb100 ], [ 9025, %sw.bb99 ], [ 8836, %sw.bb98 ], [ 8649, %sw.bb97 ], [ 8464, %sw.bb96 ], [ 8281, %sw.bb95 ], [ 8100, %sw.bb94 ], [ 7921, %sw.bb93 ], [ 7744, %sw.bb92 ], [ 7569, %sw.bb91 ], [ 7396, %sw.bb90 ], [ 7225, %sw.bb89 ], [ 7056, %sw.bb88 ], [ 6889, %sw.bb87 ], [ 6724, %sw.bb86 ], [ 6561, %sw.bb85 ], [ 6400, %sw.bb84 ], [ 6241, %sw.bb83 ], [ 6084, %sw.bb82 ], [ 5929, %sw.bb81 ], [ 5776, %sw.bb80 ], [ 5625, %sw.bb79 ], [ 5476, %sw.bb78 ], [ 5329, %sw.bb77 ], [ 5184, %sw.bb76 ], [ 5112, %sw.bb74 ], [ 4900, %sw.bb73 ], [ 4761, %sw.bb72 ], [ 4624, %sw.bb71 ], [ 4489, %sw.bb70 ], [ 4356, %sw.bb69 ], [ 4225, %sw.bb68 ], [ 4096, %sw.bb67 ], [ 3969, %sw.bb66 ], [ 3844, %sw.bb65 ], [ 3721, %sw.bb64 ], [ 3600, %sw.bb63 ], [ 3481, %sw.bb62 ], [ 3364, %sw.bb61 ], [ 3249, %sw.bb60 ], [ 3136, %sw.bb59 ], [ 3025, %sw.bb58 ], [ 2970, %sw.bb56 ], [ 2809, %sw.bb55 ], [ 2704, %sw.bb54 ], [ 2601, %sw.bb53 ], [ 2500, %sw.bb52 ], [ 2401, %sw.bb51 ], [ 2304, %sw.bb50 ], [ 2209, %sw.bb49 ], [ 2116, %sw.bb48 ], [ 2025, %sw.bb47 ], [ 1980, %sw.bb45 ], [ 1849, %sw.bb44 ], [ 1764, %sw.bb43 ], [ 1681, %sw.bb42 ], [ 1600, %sw.bb41 ], [ 1521, %sw.bb40 ], [ 1444, %sw.bb39 ], [ 1369, %sw.bb38 ], [ 1296, %sw.bb37 ], [ 1260, %sw.bb35 ], [ 1156, %sw.bb34 ], [ 1089, %sw.bb33 ], [ 1024, %sw.bb32 ], [ 961, %sw.bb31 ], [ 900, %sw.bb30 ], [ 841, %sw.bb29 ], [ 784, %sw.bb28 ], [ 729, %sw.bb27 ], [ 676, %sw.bb26 ], [ 625, %sw.bb25 ], [ 576, %sw.bb24 ], [ 529, %sw.bb23 ], [ 484, %sw.bb22 ], [ 441, %sw.bb21 ], [ 400, %sw.bb20 ], [ 361, %sw.bb19 ], [ 342, %sw.bb18 ], [ 289, %sw.bb17 ], [ 256, %sw.bb16 ], [ 225, %sw.bb15 ], [ 196, %sw.bb14 ], [ 169, %sw.bb13 ], [ 144, %sw.bb12 ], [ 121, %sw.bb11 ], [ 100, %sw.bb10 ], [ 81, %sw.bb9 ], [ 64, %sw.bb8 ], [ 49, %sw.bb7 ], [ 36, %sw.bb6 ], [ 25, %sw.bb5 ], [ 16, %sw.bb4 ], [ 9, %sw.bb3 ], [ 4, %sw.bb2 ], [ 1, %sw.bb1 ], [ 39601, %sw.bb203 ], [ 0, %if.end ] + ret i32 %retval.0 +} + +define i32 @cprop(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 1, label %return + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + i32 4, label %sw.bb2 + i32 5, label %sw.bb2 + i32 6, label %sw.bb3 + i32 7, label %sw.bb3 + ] + +sw.bb1: br label %return + +sw.bb2: + %and = and i32 %x, 1 + %tobool = icmp ne i32 %and, 0 + %cond = select i1 %tobool, i32 -123, i32 456 + %sub = sub nsw i32 %x, %cond + br label %return + +sw.bb3: + %trunc = trunc i32 %x to i8 + %sext = sext i8 %trunc to i32 + br label %return + +sw.default: + br label %return + +return: + %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ] + ret i32 %retval.0 + +; CHECK: @cprop +; CHECK: switch.lookup: +; CHECK: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table5, i32 0, i32 %switch.tableidx +} + +define i32 @unreachable(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb + i32 2, label %sw.bb + i32 3, label %sw.bb1 + i32 4, label %sw.bb2 + i32 5, label %sw.bb3 + i32 6, label %sw.bb3 + i32 7, label %sw.bb3 + i32 8, label %sw.bb3 + ] + +sw.bb: br label %return +sw.bb1: unreachable +sw.bb2: br label %return +sw.bb3: br label %return +sw.default: unreachable + +return: + %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %sw.bb ] + ret i32 %retval.0 + +; CHECK: @unreachable +; CHECK: switch.lookup: +; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx +} diff --git a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll deleted file mode 100644 index 2717228f7e..0000000000 --- a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -; Test that we add nocapture to the declaration, and to the second call only. - -; CHECK: declare float @strtol(i8*, i8** nocapture, i32) nounwind -declare float @strtol(i8* %s, i8** %endptr, i32 %base) - -define void @foo(i8* %x, i8** %endptr) { -; CHECK: call float @strtol(i8* %x, i8** %endptr, i32 10) - call float @strtol(i8* %x, i8** %endptr, i32 10) -; CHECK: %2 = call float @strtol(i8* nocapture %x, i8** null, i32 10) - call float @strtol(i8* %x, i8** null, i32 10) - ret void -} diff --git a/test/Transforms/SimplifyLibCalls/StpCpy.ll b/test/Transforms/SimplifyLibCalls/StpCpy.ll deleted file mode 100644 index 914b0955bc..0000000000 --- a/test/Transforms/SimplifyLibCalls/StpCpy.ll +++ /dev/null @@ -1,43 +0,0 @@ -; Test that the StpCpyOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -; This transformation requires the pointer size, as it assumes that size_t is -; the size of a pointer. -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" - -@hello = constant [6 x i8] c"hello\00" - -declare i8* @stpcpy(i8*, i8*) - -declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind - -declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly - -define i32 @t1() { -; CHECK: @t1 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %rslt1 = call i8* @stpcpy( i8* %arg1, i8* %arg2 ) -; CHECK: @llvm.memcpy.p0i8.p0i8.i32 - ret i32 0 -} - -define i32 @t2() { -; CHECK: @t2 - %target = alloca [1024 x i8] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 - %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false) - %rslt1 = call i8* @__stpcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1) -; CHECK: @__memcpy_chk - ret i32 0 -} - -define i8* @t3(i8* %arg) { -; CHECK: @t3 - %stpcpy = tail call i8* @stpcpy(i8* %arg, i8* %arg) -; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen(i8* %arg) -; CHECK-NEXT: getelementptr inbounds i8* %arg, i32 [[LEN]] - ret i8* %stpcpy -} diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll deleted file mode 100644 index 4a20bbd2ce..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrLen.ll +++ /dev/null @@ -1,62 +0,0 @@ -; Test that the StrCatOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep "call.*strlen" - -target datalayout = "e-p:32:32" -@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=3] -@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=3] -@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1] -@nullstring = constant i8 0 - -declare i32 @strlen(i8*) - -define i32 @test1() { - %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1] - %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1] - ret i32 %hello_l -} - -define i32 @test2() { - %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1] - %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1] - ret i32 %null_l -} - -define i32 @test3() { - %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1] - %null_hello_l = call i32 @strlen( i8* %null_hello_p ) ; <i32> [#uses=1] - ret i32 %null_hello_l -} - -define i1 @test4() { - %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1] - %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1] - %eq_hello = icmp eq i32 %hello_l, 0 ; <i1> [#uses=1] - ret i1 %eq_hello -} - -define i1 @test5() { - %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1] - %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1] - %eq_null = icmp eq i32 %null_l, 0 ; <i1> [#uses=1] - ret i1 %eq_null -} - -define i1 @test6() { - %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1] - %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1] - %ne_hello = icmp ne i32 %hello_l, 0 ; <i1> [#uses=1] - ret i1 %ne_hello -} - -define i1 @test7() { - %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1] - %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1] - %ne_null = icmp ne i32 %null_l, 0 ; <i1> [#uses=1] - ret i1 %ne_null -} - -define i32 @test8() { - %len = tail call i32 @strlen(i8* @nullstring) nounwind - ret i32 %len -} diff --git a/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/test/Transforms/SimplifyLibCalls/StrNCpy.ll deleted file mode 100644 index 4e47b31a6a..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrNCpy.ll +++ /dev/null @@ -1,29 +0,0 @@ -; Test that the StrNCpyOptimizer works correctly -; RUN: opt < %s -simplify-libcalls -S | \ -; RUN: not grep "call.*strncpy" - -; This transformation requires the pointer size, as it assumes that size_t is -; the size of a pointer. -target datalayout = "-p:64:64:64" - -@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1] -@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1] -@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1] - -declare i8* @strncpy(i8*, i8*, i32) - -declare i32 @puts(i8*) - -define i32 @main() { - %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1] - %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=2] - store i8 0, i8* %arg1 - %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1] - %rslt1 = call i8* @strncpy( i8* %arg1, i8* %arg2, i32 6 ) ; <i8*> [#uses=1] - %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1] - %rslt2 = call i8* @strncpy( i8* %rslt1, i8* %arg3, i32 42 ) ; <i8*> [#uses=1] - %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1] - %rslt3 = call i8* @strncpy( i8* %rslt2, i8* %arg4, i32 42 ) ; <i8*> [#uses=1] - call i32 @puts( i8* %rslt3 ) ; <i32>:1 [#uses=0] - ret i32 0 -} diff --git a/test/Transforms/SimplifyLibCalls/StrPBrk.ll b/test/Transforms/SimplifyLibCalls/StrPBrk.ll deleted file mode 100644 index 29c3b7477b..0000000000 --- a/test/Transforms/SimplifyLibCalls/StrPBrk.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s - -target datalayout = "-p:64:64:64" - -@hello = constant [12 x i8] c"hello world\00" -@w = constant [2 x i8] c"w\00" -@null = constant [1 x i8] zeroinitializer - -declare i8* @strpbrk(i8*, i8*) - -define void @test(i8* %s1, i8* %s2) { - %hello_p = getelementptr [12 x i8]* @hello, i32 0, i32 0 - %w_p = getelementptr [2 x i8]* @w, i32 0, i32 0 - %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 - %test1 = call i8* @strpbrk(i8* %null_p, i8* %s2) - %test2 = call i8* @strpbrk(i8* %s1, i8* %null_p) -; CHECK-NOT: call i8* @strpbrk - %test3 = call i8* @strpbrk(i8* %s1, i8* %w_p) -; CHECK: call i8* @strchr(i8* %s1, i32 119) - %test4 = call i8* @strpbrk(i8* %hello_p, i8* %w_p) -; CHECK: getelementptr i8* %hello_p, i64 6 - %test5 = call i8* @strpbrk(i8* %s1, i8* %s2) -; CHECK: call i8* @strpbrk(i8* %s1, i8* %s2) - ret void -} diff --git a/test/lit.cfg b/test/lit.cfg index 7e6760e95a..79eaa23c8b 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -5,6 +5,7 @@ import os import sys import re +import platform # name: The name of this test suite. config.name = 'LLVM' @@ -148,7 +149,9 @@ config.substitutions.append( ('%mcjit_triple', mcjit_triple) ) # Provide a substition for those tests that need to run the jit to obtain data # but simply want use the currently considered most reliable jit for platform -if 'arm' in config.target_triple: +# FIXME: ppc32 is not ready for mcjit. +if 'arm' in config.target_triple \ + or 'powerpc64' in config.target_triple: defaultIsMCJIT = 'true' else: defaultIsMCJIT = 'false' @@ -241,9 +244,9 @@ else: if loadable_module: config.available_features.add('loadable_module') -# LTO -if config.lto_is_enabled == "1": - config.available_features.add('lto') +# LTO on OS X +if config.lto_is_enabled == "1" and platform.system() == "Darwin": + config.available_features.add('lto_on_osx') # llc knows whether he is compiled with -DNDEBUG. import subprocess diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 1bfc2fe3e8..6918285622 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -36,6 +36,7 @@ add_subdirectory(bugpoint) add_subdirectory(bugpoint-passes) add_subdirectory(llvm-bcanalyzer) add_subdirectory(llvm-stress) +add_subdirectory(llvm-mcmarkup) if( NOT WIN32 ) add_subdirectory(lto) diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt index df4aa9ff4e..64164792a7 100644 --- a/tools/LLVMBuild.txt +++ b/tools/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt +subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup [component_0] type = Group diff --git a/tools/Makefile b/tools/Makefile index 901d3f35e4..17e8380677 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \ bugpoint llvm-bcanalyzer \ llvm-diff macho-dump llvm-objdump llvm-readobj \ llvm-rtdyld llvm-dwarfdump llvm-cov \ - llvm-size llvm-stress bc-wrap pso-stub + llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub # Let users override the set of tools to build from the command line. ifdef ONLY_TOOLS diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 40a15e9d31..1c3a01b1e8 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -505,9 +505,6 @@ static ld_plugin_status all_symbols_read_hook(void) { } } - // If we don't preserve any symbols, libLTO will assume that all symbols are - // needed. Keep all symbols unless we're producing a final executable. - bool anySymbolsPreserved = false; for (std::list<claimed_file>::iterator I = Modules.begin(), E = Modules.end(); I != E; ++I) { if (I->syms.empty()) @@ -533,7 +530,6 @@ static ld_plugin_status all_symbols_read_hook(void) { } lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name); // @LOCALMOD-END - anySymbolsPreserved = true; if (options::generate_api_file) api_file << I->syms[i].name << "\n"; @@ -560,12 +556,6 @@ static ld_plugin_status all_symbols_read_hook(void) { if (options::generate_api_file) api_file.close(); - if (!anySymbolsPreserved) { - // All of the IL is unnecessary! - lto_codegen_dispose(code_gen); - return LDPS_OK; - } - lto_codegen_set_pic_model(code_gen, output_type); lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF); if (!options::mcpu.empty()) diff --git a/tools/lli/RemoteTarget.cpp b/tools/lli/RemoteTarget.cpp index 66743225db..212bdfda1c 100644 --- a/tools/lli/RemoteTarget.cpp +++ b/tools/lli/RemoteTarget.cpp @@ -36,13 +36,13 @@ bool RemoteTarget::allocateSpace(size_t Size, unsigned Alignment, bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) { memcpy ((void*)Address, Data, Size); - sys::MemoryBlock Mem((void*)Address, Size); - sys::Memory::setExecutable(Mem, &ErrorMsg); return false; } bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) { memcpy ((void*)Address, Data, Size); + sys::MemoryBlock Mem((void*)Address, Size); + sys::Memory::setExecutable(Mem, &ErrorMsg); return false; } diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 0ee72387b8..22b48cf63f 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/MathExtras.h" #include <cerrno> #ifdef __linux__ @@ -241,7 +242,7 @@ public: // the data cache but not to the instruction cache. virtual void invalidateInstructionCache(); - // The MCJITMemoryManager doesn't use the following functions, so we don't + // The RTDyldMemoryManager doesn't use the following functions, so we don't // need implement them. virtual void setMemoryWritable() { llvm_unreachable("Unexpected call!"); @@ -303,9 +304,16 @@ uint8_t *LLIMCJITMemoryManager::allocateDataSection(uintptr_t Size, unsigned SectionID) { if (!Alignment) Alignment = 16; - uint8_t *Addr = (uint8_t*)calloc((Size + Alignment - 1)/Alignment, Alignment); - AllocatedDataMem.push_back(sys::MemoryBlock(Addr, Size)); - return Addr; + // Ensure that enough memory is requested to allow aligning. + size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment; + uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment); + + // Honour the alignment requirement. + uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment); + + // Store the original address from calloc so we can free it later. + AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment)); + return AlignedAddr; } uint8_t *LLIMCJITMemoryManager::allocateCodeSection(uintptr_t Size, @@ -464,7 +472,7 @@ void layoutRemoteTargetMemory(RemoteTarget *T, RecordingMemoryManager *JMM) { EE->mapSectionAddress(const_cast<void*>(Offsets[i].first), Addr); DEBUG(dbgs() << " Mapping local: " << Offsets[i].first - << " to remote: " << format("%#018x", Addr) << "\n"); + << " to remote: " << format("%p", Addr) << "\n"); } // Now load it all to the target. @@ -475,12 +483,12 @@ void layoutRemoteTargetMemory(RemoteTarget *T, RecordingMemoryManager *JMM) { T->loadCode(Addr, Offsets[i].first, Sizes[i]); DEBUG(dbgs() << " loading code: " << Offsets[i].first - << " to remote: " << format("%#018x", Addr) << "\n"); + << " to remote: " << format("%p", Addr) << "\n"); } else { T->loadData(Addr, Offsets[i].first, Sizes[i]); DEBUG(dbgs() << " loading data: " << Offsets[i].first - << " to remote: " << format("%#018x", Addr) << "\n"); + << " to remote: " << format("%p", Addr) << "\n"); } } @@ -685,7 +693,7 @@ int main(int argc, char **argv, char * const *envp) { uint64_t Entry = (uint64_t)EE->getPointerToFunction(EntryFn); DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at " - << format("%#18x", Entry) << "\n"); + << format("%p", Entry) << "\n"); if (Target.executeCode(Entry, Result)) errs() << "ERROR: " << Target.getErrorMsg() << "\n"; diff --git a/tools/llvm-ar/CMakeLists.txt b/tools/llvm-ar/CMakeLists.txt index c8b0b725d8..70eb7603fd 100644 --- a/tools/llvm-ar/CMakeLists.txt +++ b/tools/llvm-ar/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS archive) -set(LLVM_REQUIRES_EH 1) add_llvm_tool(llvm-ar llvm-ar.cpp diff --git a/tools/llvm-ar/Makefile b/tools/llvm-ar/Makefile index 6ee6f34942..fafb14bc12 100644 --- a/tools/llvm-ar/Makefile +++ b/tools/llvm-ar/Makefile @@ -10,7 +10,6 @@ LEVEL := ../.. TOOLNAME := llvm-ar LINK_COMPONENTS := archive -REQUIRES_EH := 1 # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 7c53701f00..a8a5013a9a 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include <algorithm> +#include <cstdlib> #include <memory> #include <fstream> using namespace llvm; @@ -126,40 +127,57 @@ std::set<sys::Path> Paths; // The Archive object to which all the editing operations will be sent. Archive* TheArchive = 0; +// The name this program was invoked as. +static const char *program_name; + +// show_help - Show the error message, the help message and exit. +LLVM_ATTRIBUTE_NORETURN static void +show_help(const std::string &msg) { + errs() << program_name << ": " << msg << "\n\n"; + cl::PrintHelpMessage(); + if (TheArchive) + delete TheArchive; + std::exit(1); +} + +// fail - Show the error message and exit. +LLVM_ATTRIBUTE_NORETURN static void +fail(const std::string &msg) { + errs() << program_name << ": " << msg << "\n\n"; + if (TheArchive) + delete TheArchive; + std::exit(1); +} + // getRelPos - Extract the member filename from the command line for // the [relpos] argument associated with a, b, and i modifiers void getRelPos() { - if(RestOfArgs.size() > 0) { - RelPos = RestOfArgs[0]; - RestOfArgs.erase(RestOfArgs.begin()); - } - else - throw "Expected [relpos] for a, b, or i modifier"; + if(RestOfArgs.size() == 0) + show_help("Expected [relpos] for a, b, or i modifier"); + RelPos = RestOfArgs[0]; + RestOfArgs.erase(RestOfArgs.begin()); } // getCount - Extract the [count] argument associated with the N modifier // from the command line and check its value. void getCount() { - if(RestOfArgs.size() > 0) { - Count = atoi(RestOfArgs[0].c_str()); - RestOfArgs.erase(RestOfArgs.begin()); - } - else - throw "Expected [count] value with N modifier"; + if(RestOfArgs.size() == 0) + show_help("Expected [count] value with N modifier"); + + Count = atoi(RestOfArgs[0].c_str()); + RestOfArgs.erase(RestOfArgs.begin()); // Non-positive counts are not allowed if (Count < 1) - throw "Invalid [count] value (not a positive integer)"; + show_help("Invalid [count] value (not a positive integer)"); } // getArchive - Get the archive file name from the command line void getArchive() { - if(RestOfArgs.size() > 0) { - ArchiveName = RestOfArgs[0]; - RestOfArgs.erase(RestOfArgs.begin()); - } - else - throw "An archive name must be specified."; + if(RestOfArgs.size() == 0) + show_help("An archive name must be specified"); + ArchiveName = RestOfArgs[0]; + RestOfArgs.erase(RestOfArgs.begin()); } // getMembers - Copy over remaining items in RestOfArgs to our Members vector @@ -240,25 +258,27 @@ ArchiveOperation parseCommandLine() { // Perform various checks on the operation/modifier specification // to make sure we are dealing with a legal request. if (NumOperations == 0) - throw "You must specify at least one of the operations"; + show_help("You must specify at least one of the operations"); if (NumOperations > 1) - throw "Only one operation may be specified"; + show_help("Only one operation may be specified"); if (NumPositional > 1) - throw "You may only specify one of a, b, and i modifiers"; - if (AddAfter || AddBefore || InsertBefore) + show_help("You may only specify one of a, b, and i modifiers"); + if (AddAfter || AddBefore || InsertBefore) { if (Operation != Move && Operation != ReplaceOrInsert) - throw "The 'a', 'b' and 'i' modifiers can only be specified with " - "the 'm' or 'r' operations"; + show_help("The 'a', 'b' and 'i' modifiers can only be specified with " + "the 'm' or 'r' operations"); + } if (RecurseDirectories && Operation != ReplaceOrInsert) - throw "The 'R' modifiers is only applicabe to the 'r' operation"; + show_help("The 'R' modifiers is only applicabe to the 'r' operation"); if (OriginalDates && Operation != Extract) - throw "The 'o' modifier is only applicable to the 'x' operation"; + show_help("The 'o' modifier is only applicable to the 'x' operation"); if (TruncateNames && Operation!=QuickAppend && Operation!=ReplaceOrInsert) - throw "The 'f' modifier is only applicable to the 'q' and 'r' operations"; + show_help("The 'f' modifier is only applicable to the 'q' and 'r' " + "operations"); if (OnlyUpdate && Operation != ReplaceOrInsert) - throw "The 'u' modifier is only applicable to the 'r' operation"; + show_help("The 'u' modifier is only applicable to the 'r' operation"); if (Count > 1 && Members.size() > 1) - throw "Only one member name may be specified with the 'N' modifier"; + show_help("Only one member name may be specified with the 'N' modifier"); // Return the parsed operation to the caller return Operation; @@ -304,16 +324,16 @@ bool buildPaths(bool checkExistence, std::string* ErrMsg) { for (unsigned i = 0; i < Members.size(); i++) { sys::Path aPath; if (!aPath.set(Members[i])) - throw std::string("File member name invalid: ") + Members[i]; + fail(std::string("File member name invalid: ") + Members[i]); if (checkExistence) { bool Exists; if (sys::fs::exists(aPath.str(), Exists) || !Exists) - throw std::string("File does not exist: ") + Members[i]; + fail(std::string("File does not exist: ") + Members[i]); std::string Err; sys::PathWithStatus PwS(aPath); const sys::FileStatus *si = PwS.getFileStatus(false, &Err); if (!si) - throw Err; + fail(Err); if (si->isDir) { std::set<sys::Path> dirpaths; if (recurseDirectories(aPath, dirpaths, ErrMsg)) @@ -683,6 +703,7 @@ doReplaceOrInsert(std::string* ErrMsg) { // main - main program for llvm-ar .. see comments in the code int main(int argc, char **argv) { + program_name = argv[0]; // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); @@ -698,77 +719,61 @@ int main(int argc, char **argv) { int exitCode = 0; - // Make sure we don't exit with "unhandled exception". - try { - // Do our own parsing of the command line because the CommandLine utility - // can't handle the grouped positional parameters without a dash. - ArchiveOperation Operation = parseCommandLine(); - - // Check the path name of the archive - sys::Path ArchivePath; - if (!ArchivePath.set(ArchiveName)) - throw std::string("Archive name invalid: ") + ArchiveName; - - // Create or open the archive object. - bool Exists; - if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) { - // Produce a warning if we should and we're creating the archive - if (!Create) - errs() << argv[0] << ": creating " << ArchivePath.str() << "\n"; - TheArchive = Archive::CreateEmpty(ArchivePath, Context); - TheArchive->writeToDisk(); - } else { - std::string Error; - TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error); - if (TheArchive == 0) { - errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': " - << Error << "!\n"; - return 1; - } - } + // Do our own parsing of the command line because the CommandLine utility + // can't handle the grouped positional parameters without a dash. + ArchiveOperation Operation = parseCommandLine(); - // Make sure we're not fooling ourselves. - assert(TheArchive && "Unable to instantiate the archive"); - - // Make sure we clean up the archive even on failure. - std::auto_ptr<Archive> AutoArchive(TheArchive); - - // Perform the operation - std::string ErrMsg; - bool haveError = false; - switch (Operation) { - case Print: haveError = doPrint(&ErrMsg); break; - case Delete: haveError = doDelete(&ErrMsg); break; - case Move: haveError = doMove(&ErrMsg); break; - case QuickAppend: haveError = doQuickAppend(&ErrMsg); break; - case ReplaceOrInsert: haveError = doReplaceOrInsert(&ErrMsg); break; - case DisplayTable: haveError = doDisplayTable(&ErrMsg); break; - case Extract: haveError = doExtract(&ErrMsg); break; - case NoOperation: - errs() << argv[0] << ": No operation was selected.\n"; - break; - } - if (haveError) { - errs() << argv[0] << ": " << ErrMsg << "\n"; + // Check the path name of the archive + sys::Path ArchivePath; + if (!ArchivePath.set(ArchiveName)) { + errs() << argv[0] << ": Archive name invalid: " << ArchiveName << "\n"; + return 1; + } + + // Create or open the archive object. + bool Exists; + if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) { + // Produce a warning if we should and we're creating the archive + if (!Create) + errs() << argv[0] << ": creating " << ArchivePath.str() << "\n"; + TheArchive = Archive::CreateEmpty(ArchivePath, Context); + TheArchive->writeToDisk(); + } else { + std::string Error; + TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error); + if (TheArchive == 0) { + errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': " + << Error << "!\n"; return 1; } - } catch (const char*msg) { - // These errors are usage errors, thrown only by the various checks in the - // code above. - errs() << argv[0] << ": " << msg << "\n\n"; - cl::PrintHelpMessage(); - exitCode = 1; - } catch (const std::string& msg) { - // These errors are thrown by LLVM libraries (e.g. lib System) and represent - // a more serious error so we bump the exitCode and don't print the usage. - errs() << argv[0] << ": " << msg << "\n"; - exitCode = 2; - } catch (...) { - // This really shouldn't happen, but just in case .... - errs() << argv[0] << ": An unexpected unknown exception occurred.\n"; - exitCode = 3; } + // Make sure we're not fooling ourselves. + assert(TheArchive && "Unable to instantiate the archive"); + + // Perform the operation + std::string ErrMsg; + bool haveError = false; + switch (Operation) { + case Print: haveError = doPrint(&ErrMsg); break; + case Delete: haveError = doDelete(&ErrMsg); break; + case Move: haveError = doMove(&ErrMsg); break; + case QuickAppend: haveError = doQuickAppend(&ErrMsg); break; + case ReplaceOrInsert: haveError = doReplaceOrInsert(&ErrMsg); break; + case DisplayTable: haveError = doDisplayTable(&ErrMsg); break; + case Extract: haveError = doExtract(&ErrMsg); break; + case NoOperation: + errs() << argv[0] << ": No operation was selected.\n"; + break; + } + if (haveError) { + errs() << argv[0] << ": " << ErrMsg << "\n"; + return 1; + } + + delete TheArchive; + TheArchive = 0; + // Return result code back to operating system. return exitCode; } diff --git a/tools/llvm-as/CMakeLists.txt b/tools/llvm-as/CMakeLists.txt index eef4a13e29..d5620e7297 100644 --- a/tools/llvm-as/CMakeLists.txt +++ b/tools/llvm-as/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS asmparser bitwriter) -set(LLVM_REQUIRES_EH 1) add_llvm_tool(llvm-as llvm-as.cpp diff --git a/tools/llvm-bcanalyzer/CMakeLists.txt b/tools/llvm-bcanalyzer/CMakeLists.txt index 732bc3296f..0151ea9b4f 100644 --- a/tools/llvm-bcanalyzer/CMakeLists.txt +++ b/tools/llvm-bcanalyzer/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS bitreader) -set(LLVM_REQUIRES_EH 1) add_llvm_tool(llvm-bcanalyzer llvm-bcanalyzer.cpp diff --git a/tools/llvm-dis/CMakeLists.txt b/tools/llvm-dis/CMakeLists.txt index 3125f8a5c6..9f12ecb666 100644 --- a/tools/llvm-dis/CMakeLists.txt +++ b/tools/llvm-dis/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS bitreader analysis) -set(LLVM_REQUIRES_EH 1) add_llvm_tool(llvm-dis llvm-dis.cpp diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp index 57b546e9a8..40fd51331e 100644 --- a/tools/llvm-extract/llvm-extract.cpp +++ b/tools/llvm-extract/llvm-extract.cpp @@ -72,6 +72,19 @@ ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a " "regular expression"), cl::ZeroOrMore, cl::value_desc("rfunction")); +// ExtractAlias - The alias to extract from the module. +static cl::list<std::string> +ExtractAliases("alias", cl::desc("Specify alias to extract"), + cl::ZeroOrMore, cl::value_desc("alias")); + + +// ExtractRegExpAliases - The aliases, matched via regular expression, to +// extract from the module. +static cl::list<std::string> +ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a " + "regular expression"), + cl::ZeroOrMore, cl::value_desc("ralias")); + // ExtractGlobals - The globals to extract from the module. static cl::list<std::string> ExtractGlobals("glob", cl::desc("Specify global to extract"), @@ -110,6 +123,40 @@ int main(int argc, char **argv) { // Use SetVector to avoid duplicates. SetVector<GlobalValue *> GVs; + // Figure out which aliases we should extract. + for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { + GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); + if (!GA) { + errs() << argv[0] << ": program doesn't contain alias named '" + << ExtractAliases[i] << "'!\n"; + return 1; + } + GVs.insert(GA); + } + + // Extract aliases via regular expression matching. + for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { + std::string Error; + Regex RegEx(ExtractRegExpAliases[i]); + if (!RegEx.isValid(Error)) { + errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " + "invalid regex: " << Error; + } + bool match = false; + for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); + GA != E; GA++) { + if (RegEx.match(GA->getName())) { + GVs.insert(&*GA); + match = true; + } + } + if (!match) { + errs() << argv[0] << ": program doesn't contain global named '" + << ExtractRegExpAliases[i] << "'!\n"; + return 1; + } + } + // Figure out which globals we should extract. for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); diff --git a/tools/llvm-mcmarkup/CMakeLists.txt b/tools/llvm-mcmarkup/CMakeLists.txt new file mode 100644 index 0000000000..0a51e99f19 --- /dev/null +++ b/tools/llvm-mcmarkup/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS support) + +add_llvm_tool(llvm-mcmarkup + llvm-mcmarkup.cpp + ) diff --git a/tools/llvm-mcmarkup/LLVMBuild.txt b/tools/llvm-mcmarkup/LLVMBuild.txt new file mode 100644 index 0000000000..6423493a54 --- /dev/null +++ b/tools/llvm-mcmarkup/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-mcmarkup/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-mcmarkup +parent = Tools +required_libraries = Support diff --git a/tools/llvm-mcmarkup/Makefile b/tools/llvm-mcmarkup/Makefile new file mode 100644 index 0000000000..5633a9c301 --- /dev/null +++ b/tools/llvm-mcmarkup/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-mcmarkup/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := llvm-mcmarkup +LINK_COMPONENTS := support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-mcmarkup/llvm-mcmarkup.cpp b/tools/llvm-mcmarkup/llvm-mcmarkup.cpp new file mode 100644 index 0000000000..888761f10f --- /dev/null +++ b/tools/llvm-mcmarkup/llvm-mcmarkup.cpp @@ -0,0 +1,225 @@ +//===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Example simple parser implementation for the MC assembly markup language. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +using namespace llvm; + +static cl::list<std::string> + InputFilenames(cl::Positional, cl::desc("<input files>"), + cl::ZeroOrMore); +static cl::opt<bool> +DumpTags("dump-tags", cl::desc("List all tags encountered in input")); + +static StringRef ToolName; + +/// Trivial lexer for the markup parser. Input is always handled a character +/// at a time. The lexer just encapsulates EOF and lookahead handling. +class MarkupLexer { + StringRef::const_iterator Start; + StringRef::const_iterator CurPtr; + StringRef::const_iterator End; +public: + MarkupLexer(StringRef Source) + : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {} + // When processing non-markup, input is consumed a character at a time. + bool isEOF() { return CurPtr == End; } + int getNextChar() { + if (CurPtr == End) return EOF; + return *CurPtr++; + } + int peekNextChar() { + if (CurPtr == End) return EOF; + return *CurPtr; + } + StringRef::const_iterator getPosition() const { return CurPtr; } +}; + +/// A markup tag is a name and a (usually empty) list of modifiers. +class MarkupTag { + StringRef Name; + StringRef Modifiers; + SMLoc StartLoc; +public: + MarkupTag(StringRef n, StringRef m, SMLoc Loc) + : Name(n), Modifiers(m), StartLoc(Loc) {} + StringRef getName() const { return Name; } + StringRef getModifiers() const { return Modifiers; } + SMLoc getLoc() const { return StartLoc; } +}; + +/// A simple parser implementation for creating MarkupTags from input text. +class MarkupParser { + MarkupLexer &Lex; + SourceMgr &SM; +public: + MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {} + /// Create a MarkupTag from the current position in the MarkupLexer. + /// The parseTag() method should be called when the lexer has processed + /// the opening '<' character. Input will be consumed up to and including + /// the ':' which terminates the tag open. + MarkupTag parseTag(); + /// Issue a diagnostic and terminate program execution. + void FatalError(SMLoc Loc, StringRef Msg); +}; + +void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) { + SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg); + exit(1); +} + +// Example handler for when a tag is recognized. +static void processStartTag(MarkupTag &Tag) { + // If we're just printing the tags, do that, otherwise do some simple + // colorization. + if (DumpTags) { + outs() << Tag.getName(); + if (Tag.getModifiers().size()) + outs() << " " << Tag.getModifiers(); + outs() << "\n"; + return; + } + + if (!outs().has_colors()) + return; + // Color registers as red and immediates as cyan. Those don't have nested + // tags, so don't bother keeping a stack of colors to reset to. + if (Tag.getName() == "reg") + outs().changeColor(raw_ostream::RED); + else if (Tag.getName() == "imm") + outs().changeColor(raw_ostream::CYAN); +} + +// Example handler for when the end of a tag is recognized. +static void processEndTag(MarkupTag &Tag) { + // If we're printing the tags, there's nothing more to do here. Otherwise, + // set the color back the normal. + if (DumpTags) + return; + if (!outs().has_colors()) + return; + // Just reset to basic white. + outs().changeColor(raw_ostream::WHITE, false); +} + +MarkupTag MarkupParser::parseTag() { + // First off, extract the tag into it's own StringRef so we can look at it + // outside of the context of consuming input. + StringRef::const_iterator Start = Lex.getPosition(); + SMLoc Loc = SMLoc::getFromPointer(Start - 1); + while(Lex.getNextChar() != ':') { + // EOF is an error. + if (Lex.isEOF()) + FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag"); + } + StringRef RawTag(Start, Lex.getPosition() - Start - 1); + std::pair<StringRef, StringRef> SplitTag = RawTag.split(' '); + return MarkupTag(SplitTag.first, SplitTag.second, Loc); +} + +static void parseMCMarkup(StringRef Filename) { + OwningPtr<MemoryBuffer> BufferPtr; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, BufferPtr)) { + errs() << ToolName << ": " << ec.message() << '\n'; + return; + } + MemoryBuffer *Buffer = BufferPtr.take(); + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + StringRef InputSource = Buffer->getBuffer(); + MarkupLexer Lex(InputSource); + MarkupParser Parser(Lex, SrcMgr); + + SmallVector<MarkupTag, 4> TagStack; + + for (int CurChar = Lex.getNextChar(); + CurChar != EOF; + CurChar = Lex.getNextChar()) { + switch (CurChar) { + case '<': { + // A "<<" is output as a literal '<' and does not start a markup tag. + if (Lex.peekNextChar() == '<') { + (void)Lex.getNextChar(); + break; + } + // Parse the markup entry. + TagStack.push_back(Parser.parseTag()); + + // Do any special handling for the start of a tag. + processStartTag(TagStack.back()); + continue; + } + case '>': { + SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1); + // A ">>" is output as a literal '>' and does not end a markup tag. + if (Lex.peekNextChar() == '>') { + (void)Lex.getNextChar(); + break; + } + // Close out the innermost tag. + if (TagStack.empty()) + Parser.FatalError(Loc, "'>' without matching '<'"); + + // Do any special handling for the end of a tag. + processEndTag(TagStack.back()); + + TagStack.pop_back(); + continue; + } + default: + break; + } + // For anything else, just echo the character back out. + if (!DumpTags && CurChar != EOF) + outs() << (char)CurChar; + } + + // If there are any unterminated markup tags, issue diagnostics for them. + while (!TagStack.empty()) { + MarkupTag &Tag = TagStack.back(); + SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error, + "unterminated markup tag"); + TagStack.pop_back(); + } +} + +int main(int argc, char **argv) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n"); + + ToolName = argv[0]; + + // If no input files specified, read from stdin. + if (InputFilenames.size() == 0) + InputFilenames.push_back("-"); + + std::for_each(InputFilenames.begin(), InputFilenames.end(), + parseMCMarkup); + return 0; +} diff --git a/tools/llvm-ranlib/CMakeLists.txt b/tools/llvm-ranlib/CMakeLists.txt index 3116d2e4ff..2d7defee11 100644 --- a/tools/llvm-ranlib/CMakeLists.txt +++ b/tools/llvm-ranlib/CMakeLists.txt @@ -1,5 +1,4 @@ set(LLVM_LINK_COMPONENTS archive) -set(LLVM_REQUIRES_EH 1) add_llvm_tool(llvm-ranlib llvm-ranlib.cpp diff --git a/tools/llvm-ranlib/Makefile b/tools/llvm-ranlib/Makefile index 36195f4399..cca95013f4 100644 --- a/tools/llvm-ranlib/Makefile +++ b/tools/llvm-ranlib/Makefile @@ -10,7 +10,6 @@ LEVEL := ../.. TOOLNAME := llvm-ranlib LINK_COMPONENTS := archive -REQUIRES_EH := 1 # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp index 4006765a9c..d2f5f0fff9 100644 --- a/tools/llvm-ranlib/llvm-ranlib.cpp +++ b/tools/llvm-ranlib/llvm-ranlib.cpp @@ -61,41 +61,38 @@ int main(int argc, char **argv) { int exitCode = 0; - // Make sure we don't exit with "unhandled exception". - try { - - // Check the path name of the archive - sys::Path ArchivePath; - if (!ArchivePath.set(ArchiveName)) - throw std::string("Archive name invalid: ") + ArchiveName; + // Check the path name of the archive + sys::Path ArchivePath; + if (!ArchivePath.set(ArchiveName)) { + errs() << argv[0] << ": " << "Archive name invalid: " << ArchiveName << + "\n"; + return 1; + } - // Make sure it exists, we don't create empty archives - bool Exists; - if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) - throw std::string("Archive file does not exist"); + // Make sure it exists, we don't create empty archives + bool Exists; + if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) { + errs() << argv[0] << ": " << "Archive file does not exist" << + ArchivePath.str() << "\n"; + return 1; + } - std::string err_msg; - std::auto_ptr<Archive> - AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg)); - Archive* TheArchive = AutoArchive.get(); - if (!TheArchive) - throw err_msg; + std::string err_msg; + std::auto_ptr<Archive> + AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg)); + Archive* TheArchive = AutoArchive.get(); + if (!TheArchive) { + errs() << argv[0] << ": " << err_msg << "\n"; + return 1; + } - if (TheArchive->writeToDisk(true, false, &err_msg )) - throw err_msg; + if (TheArchive->writeToDisk(true, false, &err_msg )) { + errs() << argv[0] << ": " << err_msg << "\n"; + return 1; + } - if (Verbose) - printSymbolTable(TheArchive); + if (Verbose) + printSymbolTable(TheArchive); - } catch (const char* msg) { - errs() << argv[0] << ": " << msg << "\n\n"; - exitCode = 1; - } catch (const std::string& msg) { - errs() << argv[0] << ": " << msg << "\n"; - exitCode = 2; - } catch (...) { - errs() << argv[0] << ": An unexpected unknown exception occurred.\n"; - exitCode = 3; - } return exitCode; } diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp index c8d7177d86..48d5d83019 100644 --- a/unittests/ADT/APFloatTest.cpp +++ b/unittests/ADT/APFloatTest.cpp @@ -737,4 +737,40 @@ TEST(APFloatTest, convert) { EXPECT_EQ(4294967295.0, test.convertToDouble()); EXPECT_FALSE(losesInfo); } + +TEST(APFloatTest, PPCDoubleDouble) { + APFloat test(APFloat::PPCDoubleDouble, "1.0"); + EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); + + test.divide(APFloat(APFloat::PPCDoubleDouble, "3.0"), APFloat::rmNearestTiesToEven); + EXPECT_EQ(0x3fd5555555555555ull, test.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(0x3c75555555555556ull, test.bitcastToAPInt().getRawData()[1]); + + // LDBL_MAX + test = APFloat(APFloat::PPCDoubleDouble, "1.79769313486231580793728971405301e+308"); + EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(0x7c8ffffffffffffeull, test.bitcastToAPInt().getRawData()[1]); + + // LDBL_MIN + test = APFloat(APFloat::PPCDoubleDouble, "2.00416836000897277799610805135016e-292"); + EXPECT_EQ(0x0360000000000000ull, test.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); + + test = APFloat(APFloat::PPCDoubleDouble, "1.0"); + test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-105"), APFloat::rmNearestTiesToEven); + EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); + EXPECT_EQ(0x3960000000000000ull, test.bitcastToAPInt().getRawData()[1]); + + test = APFloat(APFloat::PPCDoubleDouble, "1.0"); + test.add(APFloat(APFloat::PPCDoubleDouble, "0x1p-106"), APFloat::rmNearestTiesToEven); + EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]); +#if 0 // XFAIL + // This is what we would expect with a true double-double implementation + EXPECT_EQ(0x3950000000000000ull, test.bitcastToAPInt().getRawData()[1]); +#else + // This is what we get with our 106-bit mantissa approximation + EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]); +#endif +} } diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp index 6933091949..59604dfbf5 100644 --- a/unittests/ExecutionEngine/JIT/JITTest.cpp +++ b/unittests/ExecutionEngine/JIT/JITTest.cpp @@ -224,8 +224,8 @@ class JITTest : public testing::Test { OwningPtr<ExecutionEngine> TheJIT; }; -// Tests on ARM disabled as we're running the old jit -#if !defined(__arm__) +// Tests on ARM and PowerPC disabled as we're running the old jit +#if !defined(__arm__) && !defined(__powerpc__) // Regression test for a bug. The JIT used to allocate globals inside the same // memory block used for the function, and when the function code was freed, @@ -295,14 +295,14 @@ TEST(JIT, GlobalInFunction) { EXPECT_EQ(3, *GPtr); } -#endif // !defined(__arm__) +#endif // !defined(__arm__) && !defined(__powerpc__) int PlusOne(int arg) { return arg + 1; } -// ARM tests disabled pending fix for PR10783. -#if !defined(__arm__) +// ARM and PowerPC tests disabled pending fix for PR10783. +#if !defined(__arm__) && !defined(__powerpc__) TEST_F(JITTest, FarCallToKnownFunction) { // x86-64 can only make direct calls to functions within 32 bits of // the current PC. To call anything farther away, we have to load @@ -480,7 +480,7 @@ TEST_F(JITTest, ModuleDeletion) { EXPECT_EQ(RJMM->startExceptionTableCalls.size(), NumTablesDeallocated); } -#endif // !defined(__arm__) +#endif // !defined(__arm__) && !defined(__powerpc__) // ARM, MIPS and PPC still emit stubs for calls since the target may be // too far away to call directly. This #if can probably be removed when @@ -526,8 +526,8 @@ TEST_F(JITTest, NoStubs) { } #endif // !ARM && !PPC -// Tests on ARM disabled as we're running the old jit -#if !defined(__arm__) +// Tests on ARM and PowerPC disabled as we're running the old jit +#if !defined(__arm__) && !defined(__powerpc__) TEST_F(JITTest, FunctionPointersOutliveTheirCreator) { TheJIT->DisableLazyCompilation(true); @@ -563,12 +563,13 @@ TEST_F(JITTest, FunctionPointersOutliveTheirCreator) { #endif } -#endif //!defined(__arm__) +#endif //!defined(__arm__) && !defined(__powerpc__) -// ARM does not have an implementation +// Tests on ARM and PowerPC disabled as we're running the old jit +// In addition, ARM does not have an implementation // of replaceMachineCodeForFunction(), so recompileAndRelinkFunction // doesn't work. -#if !defined(__arm__) +#if !defined(__arm__) && !defined(__powerpc__) TEST_F(JITTest, FunctionIsRecompiledAndRelinked) { Function *F = Function::Create(TypeBuilder<int(void), false>::get(Context), GlobalValue::ExternalLinkage, "test", M); @@ -599,7 +600,7 @@ TEST_F(JITTest, FunctionIsRecompiledAndRelinked) { EXPECT_EQ(2, OrigFPtr()) << "The old pointer's target should now jump to the new version"; } -#endif // !defined(__arm__) +#endif // !defined(__arm__) && !defined(__powerpc__) } // anonymous namespace // This variable is intentionally defined differently in the statically-compiled @@ -609,8 +610,8 @@ extern "C" int32_t JITTest_AvailableExternallyGlobal; int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42; namespace { -// Tests on ARM disabled as we're running the old jit -#if !defined(__arm__) +// Tests on ARM and PowerPC disabled as we're running the old jit +#if !defined(__arm__) && !defined(__powerpc__) TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) { TheJIT->DisableLazyCompilation(true); @@ -628,7 +629,7 @@ TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) { EXPECT_EQ(42, loader()) << "func should return 42 from the external global," << " not 7 from the IR version."; } -#endif //!defined(__arm__) +#endif //!defined(__arm__) && !defined(__powerpc__) } // anonymous namespace // This function is intentionally defined differently in the statically-compiled // program from the IR input to the JIT to assert that the JIT doesn't use its @@ -639,8 +640,8 @@ extern "C" int32_t JITTest_AvailableExternallyFunction() { } namespace { -// ARM tests disabled pending fix for PR10783. -#if !defined(__arm__) +// ARM and PowerPC tests disabled pending fix for PR10783. +#if !defined(__arm__) && !defined(__powerpc__) TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) { TheJIT->DisableLazyCompilation(true); LoadAssembly("define available_externally i32 " @@ -796,7 +797,7 @@ TEST(LazyLoadedJITTest, EagerCompiledRecursionThroughGhost) { (intptr_t)TheJIT->getPointerToFunction(recur1IR)); EXPECT_EQ(3, recur1(4)); } -#endif // !defined(__arm__) +#endif // !defined(__arm__) && !defined(__powerpc__) // This code is copied from JITEventListenerTest, but it only runs once for all // the tests in this directory. Everything seems fine, but that's strange diff --git a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp index 5b99d5b676..4a22e2f641 100644 --- a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp +++ b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp @@ -65,8 +65,8 @@ void createModule2(LLVMContext &Context2, Module *&M2, Function *&FooF2) { FooF2 = M2->getFunction("foo2"); } -// ARM tests disabled pending fix for PR10783. -#if !defined(__arm__) +// ARM and PowerPC tests disabled pending fix for PR10783. +#if !defined(__arm__) && !defined(__powerpc__) TEST(MultiJitTest, EagerMode) { LLVMContext Context1; @@ -176,6 +176,6 @@ TEST(MultiJitTest, JitPool) { #endif EXPECT_TRUE(sa == fa); } -#endif // !defined(__arm__) +#endif // !defined(__arm__) && !defined(__powerpc__) } // anonymous namespace diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp index 4644bf3c26..6b79a683bc 100644 --- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp +++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp @@ -47,6 +47,7 @@ TEST_F(MCJITTest, global_variable) { GlobalValue *Global = insertGlobalInt32(M.get(), "test_global", initialValue); createJIT(M.take()); void *globalPtr = TheJIT->getPointerToGlobal(Global); + static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache(); EXPECT_TRUE(0 != globalPtr) << "Unable to get pointer to global value from JIT"; @@ -60,6 +61,7 @@ TEST_F(MCJITTest, add_function) { Function *F = insertAddFunction(M.get()); createJIT(M.take()); void *addPtr = TheJIT->getPointerToFunction(F); + static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache(); EXPECT_TRUE(0 != addPtr) << "Unable to get pointer to function from JIT"; @@ -76,6 +78,7 @@ TEST_F(MCJITTest, run_main) { Function *Main = insertMainFunction(M.get(), 6); createJIT(M.take()); void *vPtr = TheJIT->getPointerToFunction(Main); + static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache(); EXPECT_TRUE(0 != vPtr) << "Unable to get pointer to main() from JIT"; @@ -97,6 +100,7 @@ TEST_F(MCJITTest, return_global) { createJIT(M.take()); void *rgvPtr = TheJIT->getPointerToFunction(ReturnGlobal); + static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache(); EXPECT_TRUE(0 != rgvPtr); int32_t(*FuncPtr)(void) = (int32_t(*)(void))(intptr_t)rgvPtr; @@ -165,6 +169,7 @@ TEST_F(MCJITTest, multiple_functions) { createJIT(M.take()); void *vPtr = TheJIT->getPointerToFunction(Outer); + static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache(); EXPECT_TRUE(0 != vPtr) << "Unable to get pointer to outer function from JIT"; diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp index 7f3cf2455e..d6baf3c9bb 100644 --- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp +++ b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -14,6 +14,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MathExtras.h" #include "SectionMemoryManager.h" @@ -34,9 +35,16 @@ uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size, unsigned SectionID) { if (!Alignment) Alignment = 16; - uint8_t *Addr = (uint8_t*)calloc((Size + Alignment - 1)/Alignment, Alignment); - AllocatedDataMem.push_back(sys::MemoryBlock(Addr, Size)); - return Addr; + // Ensure that enough memory is requested to allow aligning. + size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment; + uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment); + + // Honour the alignment requirement. + uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment); + + // Store the original address from calloc so we can free it later. + AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment)); + return AlignedAddr; } uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size, diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h index fb6c0348b1..e44217c906 100644 --- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h +++ b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h @@ -52,6 +52,7 @@ private: SmallVector<sys::MemoryBlock, 16> FreeCodeMem; public: + /// /// Functions below are not used by MCJIT, but must be implemented because /// they are declared as pure virtuals in the base class. diff --git a/unittests/VMCore/IRBuilderTest.cpp b/unittests/VMCore/IRBuilderTest.cpp index b6a3795fd0..9f26936df4 100644 --- a/unittests/VMCore/IRBuilderTest.cpp +++ b/unittests/VMCore/IRBuilderTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/BasicBlock.h" +#include "llvm/DataLayout.h" #include "llvm/Function.h" #include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" @@ -96,4 +97,15 @@ TEST_F(IRBuilderTest, CreateCondBr) { EXPECT_EQ(Weights, TI->getMetadata(LLVMContext::MD_prof)); } +TEST_F(IRBuilderTest, GetIntTy) { + IRBuilder<> Builder(BB); + IntegerType *Ty1 = Builder.getInt1Ty(); + EXPECT_EQ(Ty1, IntegerType::get(getGlobalContext(), 1)); + + DataLayout* DL = new DataLayout(M.get()); + IntegerType *IntPtrTy = Builder.getIntPtrTy(DL); + unsigned IntPtrBitSize = DL->getPointerSizeInBits(0); + EXPECT_EQ(IntPtrTy, IntegerType::get(getGlobalContext(), IntPtrBitSize)); +} + } diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp index 4cadc36f8f..a3b13ce92d 100644 --- a/unittests/VMCore/InstructionsTest.cpp +++ b/unittests/VMCore/InstructionsTest.cpp @@ -243,5 +243,42 @@ TEST(InstructionsTest, FPMathOperator) { delete I; } + +TEST(InstructionsTest, isEliminableCastPair) { + LLVMContext &C(getGlobalContext()); + + Type* Int32Ty = Type::getInt32Ty(C); + Type* Int64Ty = Type::getInt64Ty(C); + Type* Int64PtrTy = Type::getInt64PtrTy(C); + + // Source and destination pointers have same size -> bitcast. + EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt, + CastInst::IntToPtr, + Int64PtrTy, Int64Ty, Int64PtrTy, + Int32Ty, 0, Int32Ty), + CastInst::BitCast); + + // Source and destination pointers have different sizes -> fail. + EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::PtrToInt, + CastInst::IntToPtr, + Int64PtrTy, Int64Ty, Int64PtrTy, + Int32Ty, 0, Int64Ty), + 0U); + + // Middle pointer big enough -> bitcast. + EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr, + CastInst::PtrToInt, + Int64Ty, Int64PtrTy, Int64Ty, + 0, Int64Ty, 0), + CastInst::BitCast); + + // Middle pointer too small -> fail. + EXPECT_EQ(CastInst::isEliminableCastPair(CastInst::IntToPtr, + CastInst::PtrToInt, + Int64Ty, Int64PtrTy, Int64Ty, + 0, Int32Ty, 0), + 0U); +} + } // end anonymous namespace } // end namespace llvm diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index e76fa57066..ee83311c58 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -689,18 +689,18 @@ parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) { // Split via the '='. std::pair<StringRef, StringRef> Ops = S.split('='); if (Ops.second == "") - throw TGError(Loc, "missing '=' in two-operand alias constraint"); + PrintFatalError(Loc, "missing '=' in two-operand alias constraint"); // Trim whitespace and the leading '$' on the operand names. size_t start = Ops.first.find_first_of('$'); if (start == std::string::npos) - throw TGError(Loc, "expected '$' prefix on asm operand name"); + PrintFatalError(Loc, "expected '$' prefix on asm operand name"); Ops.first = Ops.first.slice(start + 1, std::string::npos); size_t end = Ops.first.find_last_of(" \t"); Ops.first = Ops.first.slice(0, end); // Now the second operand. start = Ops.second.find_first_of('$'); if (start == std::string::npos) - throw TGError(Loc, "expected '$' prefix on asm operand name"); + PrintFatalError(Loc, "expected '$' prefix on asm operand name"); Ops.second = Ops.second.slice(start + 1, std::string::npos); end = Ops.second.find_last_of(" \t"); Ops.first = Ops.first.slice(0, end); @@ -716,11 +716,11 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) { int SrcAsmOperand = findAsmOperandNamed(Ops.first); int DstAsmOperand = findAsmOperandNamed(Ops.second); if (SrcAsmOperand == -1) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "unknown source two-operand alias operand '" + Ops.first.str() + "'."); if (DstAsmOperand == -1) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "unknown destination two-operand alias operand '" + Ops.second.str() + "'."); @@ -852,15 +852,15 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) { // The first token of the instruction is the mnemonic, which must be a // simple string, not a $foo variable or a singleton register. if (AsmOperands.empty()) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "Instruction '" + TheDef->getName() + "' has no tokens"); Mnemonic = AsmOperands[0].Token; if (Mnemonic.empty()) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "Missing instruction mnemonic"); // FIXME : Check and raise an error if it is a register. if (Mnemonic[0] == '$') - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "Invalid instruction mnemonic '" + Mnemonic.str() + "'!"); // Remove the first operand, it is tracked in the mnemonic field. @@ -870,12 +870,12 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) { bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const { // Reject matchables with no .s string. if (AsmString.empty()) - throw TGError(TheDef->getLoc(), "instruction with empty asm string"); + PrintFatalError(TheDef->getLoc(), "instruction with empty asm string"); // Reject any matchables with a newline in them, they should be marked // isCodeGenOnly if they are pseudo instructions. if (AsmString.find('\n') != std::string::npos) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "multiline instruction is not valid for the asmparser, " "mark it isCodeGenOnly"); @@ -883,7 +883,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const { // has one line. if (!CommentDelimiter.empty() && StringRef(AsmString).find(CommentDelimiter) != StringRef::npos) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "asmstring for instruction has comment character in it, " "mark it isCodeGenOnly"); @@ -897,7 +897,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const { for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) { StringRef Tok = AsmOperands[i].Token; if (Tok[0] == '$' && Tok.find(':') != StringRef::npos) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "matchable with operand modifier '" + Tok.str() + "' not supported by asm matcher. Mark isCodeGenOnly!"); @@ -905,7 +905,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const { // We reject aliases and ignore instructions for now. if (Tok[0] == '$' && !OperandNames.insert(Tok).second) { if (!Hack) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "ERROR: matchable with tied operand '" + Tok.str() + "' can never be matched!"); // FIXME: Should reject these. The ARM backend hits this with $lane in a @@ -1004,8 +1004,8 @@ AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) { // use it, else just fall back to the underlying register class. const RecordVal *R = Rec->getValue("ParserMatchClass"); if (R == 0 || R->getValue() == 0) - throw "Record `" + Rec->getName() + - "' does not have a ParserMatchClass!\n"; + PrintFatalError("Record `" + Rec->getName() + + "' does not have a ParserMatchClass!\n"); if (DefInit *DI= dyn_cast<DefInit>(R->getValue())) { Record *MatchClass = DI->getDef(); @@ -1016,28 +1016,28 @@ AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) { // No custom match class. Just use the register class. Record *ClassRec = Rec->getValueAsDef("RegClass"); if (!ClassRec) - throw TGError(Rec->getLoc(), "RegisterOperand `" + Rec->getName() + + PrintFatalError(Rec->getLoc(), "RegisterOperand `" + Rec->getName() + "' has no associated register class!\n"); if (ClassInfo *CI = RegisterClassClasses[ClassRec]) return CI; - throw TGError(Rec->getLoc(), "register class has no class info!"); + PrintFatalError(Rec->getLoc(), "register class has no class info!"); } if (Rec->isSubClassOf("RegisterClass")) { if (ClassInfo *CI = RegisterClassClasses[Rec]) return CI; - throw TGError(Rec->getLoc(), "register class has no class info!"); + PrintFatalError(Rec->getLoc(), "register class has no class info!"); } if (!Rec->isSubClassOf("Operand")) - throw TGError(Rec->getLoc(), "Operand `" + Rec->getName() + + PrintFatalError(Rec->getLoc(), "Operand `" + Rec->getName() + "' does not derive from class Operand!\n"); Record *MatchClass = Rec->getValueAsDef("ParserMatchClass"); if (ClassInfo *CI = AsmOperandClasses[MatchClass]) return CI; - throw TGError(Rec->getLoc(), "operand has no match class!"); + PrintFatalError(Rec->getLoc(), "operand has no match class!"); } void AsmMatcherInfo:: @@ -1287,7 +1287,7 @@ void AsmMatcherInfo::buildInfo() { continue; if (Pred->getName().empty()) - throw TGError(Pred->getLoc(), "Predicate has no name!"); + PrintFatalError(Pred->getLoc(), "Predicate has no name!"); unsigned FeatureNo = SubtargetFeatures.size(); SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo); @@ -1468,7 +1468,7 @@ void AsmMatcherInfo::buildInfo() { ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken")); ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken")); if (FromClass == ToClass) - throw TGError(Rec->getLoc(), + PrintFatalError(Rec->getLoc(), "error: Destination value identical to source value."); FromClass->SuperClasses.push_back(ToClass); } @@ -1490,7 +1490,7 @@ buildInstructionOperandReference(MatchableInfo *II, // Map this token to an operand. unsigned Idx; if (!Operands.hasOperandNamed(OperandName, Idx)) - throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" + + PrintFatalError(II->TheDef->getLoc(), "error: unable to find operand: '" + OperandName.str() + "'"); // If the instruction operand has multiple suboperands, but the parser @@ -1561,7 +1561,7 @@ void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II, return; } - throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" + + PrintFatalError(II->TheDef->getLoc(), "error: unable to find operand: '" + OperandName.str() + "'"); } @@ -1583,7 +1583,7 @@ void MatchableInfo::buildInstructionResultOperands() { // Find out what operand from the asmparser this MCInst operand comes from. int SrcOperand = findAsmOperandNamed(OpInfo.Name); if (OpInfo.Name.empty() || SrcOperand == -1) - throw TGError(TheDef->getLoc(), "Instruction '" + + PrintFatalError(TheDef->getLoc(), "Instruction '" + TheDef->getName() + "' has operand '" + OpInfo.Name + "' that doesn't appear in asm string!"); @@ -1635,7 +1635,7 @@ void MatchableInfo::buildAliasResultOperands() { StringRef Name = CGA.ResultOperands[AliasOpNo].getName(); int SrcOperand = findAsmOperand(Name, SubIdx); if (SrcOperand == -1) - throw TGError(TheDef->getLoc(), "Instruction '" + + PrintFatalError(TheDef->getLoc(), "Instruction '" + TheDef->getName() + "' has operand '" + OpName + "' that doesn't appear in asm string!"); unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1); @@ -2270,7 +2270,7 @@ static std::string GetAliasRequiredFeatures(Record *R, SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]); if (F == 0) - throw TGError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() + + PrintFatalError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() + "' is not marked as an AssemblerPredicate!"); if (NumFeatures) @@ -2333,14 +2333,14 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) { // We can't have two aliases from the same mnemonic with no predicate. PrintError(ToVec[AliasWithNoPredicate]->getLoc(), "two MnemonicAliases with the same 'from' mnemonic!"); - throw TGError(R->getLoc(), "this is the other MnemonicAlias."); + PrintFatalError(R->getLoc(), "this is the other MnemonicAlias."); } AliasWithNoPredicate = i; continue; } if (R->getValueAsString("ToMnemonic") == I->first) - throw TGError(R->getLoc(), "MnemonicAlias to the same string"); + PrintFatalError(R->getLoc(), "MnemonicAlias to the same string"); if (!MatchCode.empty()) MatchCode += "else "; diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp index 9e453e0f6d..a4114d9815 100644 --- a/utils/TableGen/AsmWriterEmitter.cpp +++ b/utils/TableGen/AsmWriterEmitter.cpp @@ -566,9 +566,9 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName, std::vector<std::string> AltNames = Reg.TheDef->getValueAsListOfStrings("AltNames"); if (AltNames.size() <= Idx) - throw TGError(Reg.TheDef->getLoc(), - (Twine("Register definition missing alt name for '") + - AltName + "'.").str()); + PrintFatalError(Reg.TheDef->getLoc(), + (Twine("Register definition missing alt name for '") + + AltName + "'.").str()); AsmName = AltNames[Idx]; } } diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp index 350a2ccfcc..fe1f756361 100644 --- a/utils/TableGen/AsmWriterInst.cpp +++ b/utils/TableGen/AsmWriterInst.cpp @@ -14,6 +14,7 @@ #include "AsmWriterInst.h" #include "CodeGenTarget.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" using namespace llvm; @@ -123,8 +124,8 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, != std::string::npos) { AddLiteralString(std::string(1, AsmString[DollarPos+1])); } else { - throw "Non-supported escaped character found in instruction '" + - CGI.TheDef->getName() + "'!"; + PrintFatalError("Non-supported escaped character found in instruction '" + + CGI.TheDef->getName() + "'!"); } LastEmitted = DollarPos+2; continue; @@ -162,15 +163,15 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, // brace. if (hasCurlyBraces) { if (VarEnd >= AsmString.size()) - throw "Reached end of string before terminating curly brace in '" - + CGI.TheDef->getName() + "'"; + PrintFatalError("Reached end of string before terminating curly brace in '" + + CGI.TheDef->getName() + "'"); // Look for a modifier string. if (AsmString[VarEnd] == ':') { ++VarEnd; if (VarEnd >= AsmString.size()) - throw "Reached end of string before terminating curly brace in '" - + CGI.TheDef->getName() + "'"; + PrintFatalError("Reached end of string before terminating curly brace in '" + + CGI.TheDef->getName() + "'"); unsigned ModifierStart = VarEnd; while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd])) @@ -178,17 +179,17 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, Modifier = std::string(AsmString.begin()+ModifierStart, AsmString.begin()+VarEnd); if (Modifier.empty()) - throw "Bad operand modifier name in '"+ CGI.TheDef->getName() + "'"; + PrintFatalError("Bad operand modifier name in '"+ CGI.TheDef->getName() + "'"); } if (AsmString[VarEnd] != '}') - throw "Variable name beginning with '{' did not end with '}' in '" - + CGI.TheDef->getName() + "'"; + PrintFatalError("Variable name beginning with '{' did not end with '}' in '" + + CGI.TheDef->getName() + "'"); ++VarEnd; } if (VarName.empty() && Modifier.empty()) - throw "Stray '$' in '" + CGI.TheDef->getName() + - "' asm string, maybe you want $$?"; + PrintFatalError("Stray '$' in '" + CGI.TheDef->getName() + + "' asm string, maybe you want $$?"); if (VarName.empty()) { // Just a modifier, pass this into PrintSpecial. diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index 116fa57522..d0416c9081 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -1,4 +1,3 @@ -set(LLVM_REQUIRES_EH 1) set(LLVM_LINK_COMPONENTS Support) add_tablegen(llvm-tblgen LLVM diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp index dff97f4e83..94f3c6518c 100644 --- a/utils/TableGen/CallingConvEmitter.cpp +++ b/utils/TableGen/CallingConvEmitter.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenTarget.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include <cassert> @@ -93,7 +94,7 @@ void CallingConvEmitter::EmitAction(Record *Action, O << Action->getValueAsString("Predicate"); } else { Action->dump(); - throw "Unknown CCPredicateAction!"; + PrintFatalError("Unknown CCPredicateAction!"); } O << ") {\n"; @@ -131,7 +132,7 @@ void CallingConvEmitter::EmitAction(Record *Action, ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList"); if (ShadowRegList->getSize() >0 && ShadowRegList->getSize() != RegList->getSize()) - throw "Invalid length of list of shadowed registers"; + PrintFatalError("Invalid length of list of shadowed registers"); if (RegList->getSize() == 1) { O << IndentStr << "if (unsigned Reg = State.AllocateReg("; @@ -221,7 +222,7 @@ void CallingConvEmitter::EmitAction(Record *Action, O << IndentStr << IndentStr << "return false;\n"; } else { Action->dump(); - throw "Unknown CCAction!"; + PrintFatalError("Unknown CCAction!"); } } } diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 3b5511c056..d5b581b598 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -79,14 +79,19 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP, const std::vector<MVT::SimpleValueType> &LegalTypes = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes(); + if (TP.hasError()) + return false; + for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i) if (Pred == 0 || Pred(LegalTypes[i])) TypeVec.push_back(LegalTypes[i]); // If we have nothing that matches the predicate, bail out. - if (TypeVec.empty()) + if (TypeVec.empty()) { TP.error("Type inference contradiction found, no " + std::string(PredicateName) + " types found"); + return false; + } // No need to sort with one element. if (TypeVec.size() == 1) return true; @@ -146,9 +151,9 @@ std::string EEVT::TypeSet::getName() const { /// MergeInTypeInfo - This merges in type information from the specified /// argument. If 'this' changes, it returns true. If the two types are -/// contradictory (e.g. merge f32 into i32) then this throws an exception. +/// contradictory (e.g. merge f32 into i32) then this flags an error. bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){ - if (InVT.isCompletelyUnknown() || *this == InVT) + if (InVT.isCompletelyUnknown() || *this == InVT || TP.hasError()) return false; if (isCompletelyUnknown()) { @@ -224,11 +229,13 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){ // FIXME: Really want an SMLoc here! TP.error("Type inference contradiction found, merging '" + InVT.getName() + "' into '" + InputSet.getName() + "'"); - return true; // unreachable + return false; } /// EnforceInteger - Remove all non-integer types from this set. bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) { + if (TP.hasError()) + return false; // If we know nothing, then get the full set. if (TypeVec.empty()) return FillWithPossibleTypes(TP, isInteger, "integer"); @@ -242,14 +249,18 @@ bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) { if (!isInteger(TypeVec[i])) TypeVec.erase(TypeVec.begin()+i--); - if (TypeVec.empty()) + if (TypeVec.empty()) { TP.error("Type inference contradiction found, '" + InputSet.getName() + "' needs to be integer"); + return false; + } return true; } /// EnforceFloatingPoint - Remove all integer types from this set. bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) { + if (TP.hasError()) + return false; // If we know nothing, then get the full set. if (TypeVec.empty()) return FillWithPossibleTypes(TP, isFloatingPoint, "floating point"); @@ -264,14 +275,19 @@ bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) { if (!isFloatingPoint(TypeVec[i])) TypeVec.erase(TypeVec.begin()+i--); - if (TypeVec.empty()) + if (TypeVec.empty()) { TP.error("Type inference contradiction found, '" + InputSet.getName() + "' needs to be floating point"); + return false; + } return true; } /// EnforceScalar - Remove all vector types from this. bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) { + if (TP.hasError()) + return false; + // If we know nothing, then get the full set. if (TypeVec.empty()) return FillWithPossibleTypes(TP, isScalar, "scalar"); @@ -286,14 +302,19 @@ bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) { if (!isScalar(TypeVec[i])) TypeVec.erase(TypeVec.begin()+i--); - if (TypeVec.empty()) + if (TypeVec.empty()) { TP.error("Type inference contradiction found, '" + InputSet.getName() + "' needs to be scalar"); + return false; + } return true; } /// EnforceVector - Remove all vector types from this. bool EEVT::TypeSet::EnforceVector(TreePattern &TP) { + if (TP.hasError()) + return false; + // If we know nothing, then get the full set. if (TypeVec.empty()) return FillWithPossibleTypes(TP, isVector, "vector"); @@ -308,9 +329,11 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) { MadeChange = true; } - if (TypeVec.empty()) + if (TypeVec.empty()) { TP.error("Type inference contradiction found, '" + InputSet.getName() + "' needs to be a vector"); + return false; + } return MadeChange; } @@ -319,6 +342,9 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) { /// EnforceSmallerThan - 'this' must be a smaller VT than Other. Update /// this an other based on this information. bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) { + if (TP.hasError()) + return false; + // Both operands must be integer or FP, but we don't care which. bool MadeChange = false; @@ -365,19 +391,22 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) { if (hasVectorTypes() && Other.hasVectorTypes()) { if (Type.getSizeInBits() >= OtherType.getSizeInBits()) if (Type.getVectorElementType().getSizeInBits() - >= OtherType.getVectorElementType().getSizeInBits()) + >= OtherType.getVectorElementType().getSizeInBits()) { TP.error("Type inference contradiction found, '" + getName() + "' element type not smaller than '" + Other.getName() +"'!"); + return false; + } } else // For scalar types, the bitsize of this type must be larger // than that of the other. - if (Type.getSizeInBits() >= OtherType.getSizeInBits()) + if (Type.getSizeInBits() >= OtherType.getSizeInBits()) { TP.error("Type inference contradiction found, '" + getName() + "' is not smaller than '" + Other.getName() +"'!"); - + return false; + } } @@ -437,9 +466,11 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) { // If this is the only type in the large set, the constraint can never be // satisfied. if ((Other.hasIntegerTypes() && OtherIntSize == 0) - || (Other.hasFloatingPointTypes() && OtherFPSize == 0)) + || (Other.hasFloatingPointTypes() && OtherFPSize == 0)) { TP.error("Type inference contradiction found, '" + Other.getName() + "' has nothing larger than '" + getName() +"'!"); + return false; + } // Okay, find the largest type in the Other set and remove it from the // current set. @@ -493,9 +524,11 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) { // If this is the only type in the small set, the constraint can never be // satisfied. if ((hasIntegerTypes() && IntSize == 0) - || (hasFloatingPointTypes() && FPSize == 0)) + || (hasFloatingPointTypes() && FPSize == 0)) { TP.error("Type inference contradiction found, '" + getName() + "' has nothing smaller than '" + Other.getName()+"'!"); + return false; + } return MadeChange; } @@ -504,6 +537,9 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) { /// whose element is specified by VTOperand. bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand, TreePattern &TP) { + if (TP.hasError()) + return false; + // "This" must be a vector and "VTOperand" must be a scalar. bool MadeChange = false; MadeChange |= EnforceVector(TP); @@ -535,9 +571,11 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand, } } - if (TypeVec.empty()) // FIXME: Really want an SMLoc here! + if (TypeVec.empty()) { // FIXME: Really want an SMLoc here! TP.error("Type inference contradiction found, forcing '" + InputSet.getName() + "' to have a vector element"); + return false; + } return MadeChange; } @@ -769,7 +807,7 @@ SDTypeConstraint::SDTypeConstraint(Record *R) { ConstraintType = SDTCisVT; x.SDTCisVT_Info.VT = getValueType(R->getValueAsDef("VT")); if (x.SDTCisVT_Info.VT == MVT::isVoid) - throw TGError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT"); + PrintFatalError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT"); } else if (R->isSubClassOf("SDTCisPtrTy")) { ConstraintType = SDTCisPtrTy; @@ -829,11 +867,13 @@ static TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N, /// ApplyTypeConstraint - Given a node in a pattern, apply this type /// constraint to the nodes operands. This returns true if it makes a -/// change, false otherwise. If a type contradiction is found, throw an -/// exception. +/// change, false otherwise. If a type contradiction is found, flag an error. bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo, TreePattern &TP) const { + if (TP.hasError()) + return false; + unsigned ResNo = 0; // The result number being referenced. TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo); @@ -866,8 +906,10 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N, if (!NodeToApply->isLeaf() || !isa<DefInit>(NodeToApply->getLeafValue()) || !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef() - ->isSubClassOf("ValueType")) + ->isSubClassOf("ValueType")) { TP.error(N->getOperator()->getName() + " expects a VT operand!"); + return false; + } MVT::SimpleValueType VT = getValueType(static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()); @@ -1176,7 +1218,11 @@ SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) { /// fragments, inline them into place, giving us a pattern without any /// PatFrag references. TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) { - if (isLeaf()) return this; // nothing to do. + if (TP.hasError()) + return 0; + + if (isLeaf()) + return this; // nothing to do. Record *Op = getOperator(); if (!Op->isSubClassOf("PatFrag")) { @@ -1199,9 +1245,11 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) { TreePattern *Frag = TP.getDAGPatterns().getPatternFragment(Op); // Verify that we are passing the right number of operands. - if (Frag->getNumArgs() != Children.size()) + if (Frag->getNumArgs() != Children.size()) { TP.error("'" + Op->getName() + "' fragment requires " + utostr(Frag->getNumArgs()) + " operands!"); + return 0; + } TreePatternNode *FragTree = Frag->getOnlyTree()->clone(); @@ -1375,9 +1423,11 @@ TreePatternNode::isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const { /// ApplyTypeConstraints - Apply all of the type constraints relevant to /// this node and its children in the tree. This returns true if it makes a -/// change, false otherwise. If a type contradiction is found, throw an -/// exception. +/// change, false otherwise. If a type contradiction is found, flag an error. bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { + if (TP.hasError()) + return false; + CodeGenDAGPatterns &CDP = TP.getDAGPatterns(); if (isLeaf()) { if (DefInit *DI = dyn_cast<DefInit>(getLeafValue())) { @@ -1414,7 +1464,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { TP.error("Integer value '" + itostr(II->getValue()) + "' is out of range for type '" + getEnumName(getType(0)) + "'!"); - return MadeChange; + return false; } return false; } @@ -1477,10 +1527,12 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { for (unsigned i = 0, e = NumRetVTs; i != e; ++i) MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP); - if (getNumChildren() != NumParamVTs + 1) + if (getNumChildren() != NumParamVTs + 1) { TP.error("Intrinsic '" + Int->Name + "' expects " + utostr(NumParamVTs) + " operands, not " + utostr(getNumChildren() - 1) + " operands!"); + return false; + } // Apply type info to the intrinsic ID. MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP); @@ -1500,9 +1552,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { // Check that the number of operands is sane. Negative operands -> varargs. if (NI.getNumOperands() >= 0 && - getNumChildren() != (unsigned)NI.getNumOperands()) + getNumChildren() != (unsigned)NI.getNumOperands()) { TP.error(getOperator()->getName() + " node requires exactly " + itostr(NI.getNumOperands()) + " operands!"); + return false; + } bool MadeChange = NI.ApplyTypeConstraints(this, TP); for (unsigned i = 0, e = getNumChildren(); i != e; ++i) @@ -1576,9 +1630,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { continue; // Verify that we didn't run out of provided operands. - if (ChildNo >= getNumChildren()) + if (ChildNo >= getNumChildren()) { TP.error("Instruction '" + getOperator()->getName() + "' expects more operands than were provided."); + return false; + } MVT::SimpleValueType VT; TreePatternNode *Child = getChild(ChildNo++); @@ -1606,9 +1662,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters); } - if (ChildNo != getNumChildren()) + if (ChildNo != getNumChildren()) { TP.error("Instruction '" + getOperator()->getName() + "' was provided too many operands!"); + return false; + } return MadeChange; } @@ -1616,9 +1674,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { assert(getOperator()->isSubClassOf("SDNodeXForm") && "Unknown node type!"); // Node transforms always take one operand. - if (getNumChildren() != 1) + if (getNumChildren() != 1) { TP.error("Node transform '" + getOperator()->getName() + "' requires one operand!"); + return false; + } bool MadeChange = getChild(0)->ApplyTypeConstraints(TP, NotRegisters); @@ -1692,27 +1752,30 @@ bool TreePatternNode::canPatternMatch(std::string &Reason, // TreePattern::TreePattern(Record *TheRec, ListInit *RawPat, bool isInput, - CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){ - isInputPattern = isInput; + CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp), + isInputPattern(isInput), HasError(false) { for (unsigned i = 0, e = RawPat->getSize(); i != e; ++i) Trees.push_back(ParseTreePattern(RawPat->getElement(i), "")); } TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput, - CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){ - isInputPattern = isInput; + CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp), + isInputPattern(isInput), HasError(false) { Trees.push_back(ParseTreePattern(Pat, "")); } TreePattern::TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput, - CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){ - isInputPattern = isInput; + CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp), + isInputPattern(isInput), HasError(false) { Trees.push_back(Pat); } -void TreePattern::error(const std::string &Msg) const { +void TreePattern::error(const std::string &Msg) { + if (HasError) + return; dump(); - throw TGError(TheRecord->getLoc(), "In " + TheRecord->getName() + ": " + Msg); + PrintError(TheRecord->getLoc(), "In " + TheRecord->getName() + ": " + Msg); + HasError = true; } void TreePattern::ComputeNamedNodes() { @@ -1901,7 +1964,7 @@ static bool SimplifyTree(TreePatternNode *&N) { /// InferAllTypes - Infer/propagate as many types throughout the expression /// patterns as possible. Return true if all types are inferred, false -/// otherwise. Throw an exception if a type contradiction is found. +/// otherwise. Flags an error if a type contradiction is found. bool TreePattern:: InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) { if (NamedNodes.empty()) @@ -2152,14 +2215,8 @@ void CodeGenDAGPatterns::ParsePatternFragments() { // Infer as many types as possible. Don't worry about it if we don't infer // all of them, some may depend on the inputs of the pattern. - try { - ThePat->InferAllTypes(); - } catch (...) { - // If this pattern fragment is not supported by this target (no types can - // satisfy its constraints), just ignore it. If the bogus pattern is - // actually used by instructions, the type consistency error will be - // reported there. - } + ThePat->InferAllTypes(); + ThePat->resetError(); // If debugging, print out the pattern fragment result. DEBUG(ThePat->dump()); @@ -2199,8 +2256,8 @@ void CodeGenDAGPatterns::ParseDefaultOperands() { /* Resolve all types */; if (TPN->ContainsUnresolvedType()) { - throw "Value #" + utostr(i) + " of OperandWithDefaultOps '" + - DefaultOps[i]->getName() +"' doesn't have a concrete type!"; + PrintFatalError("Value #" + utostr(i) + " of OperandWithDefaultOps '" + + DefaultOps[i]->getName() +"' doesn't have a concrete type!"); } DefaultOpInfo.DefaultOps.push_back(TPN); } @@ -2746,7 +2803,7 @@ void CodeGenDAGPatterns::ParseInstructions() { Instructions.begin(), E = Instructions.end(); II != E; ++II) { DAGInstruction &TheInst = II->second; - const TreePattern *I = TheInst.getPattern(); + TreePattern *I = TheInst.getPattern(); if (I == 0) continue; // No pattern. // FIXME: Assume only the first tree is the pattern. The others are clobber @@ -2777,7 +2834,7 @@ typedef std::pair<const TreePatternNode*, unsigned> NameRecord; static void FindNames(const TreePatternNode *P, std::map<std::string, NameRecord> &Names, - const TreePattern *PatternTop) { + TreePattern *PatternTop) { if (!P->getName().empty()) { NameRecord &Rec = Names[P->getName()]; // If this is the first instance of the name, remember the node. @@ -2794,7 +2851,7 @@ static void FindNames(const TreePatternNode *P, } } -void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern, +void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern, const PatternToMatch &PTM) { // Do some sanity checking on the pattern we're about to match. std::string Reason; @@ -2895,7 +2952,7 @@ void CodeGenDAGPatterns::InferInstructionFlags() { } if (Errors) - throw "pattern conflicts"; + PrintFatalError("pattern conflicts"); // Revisit instructions with undefined flags and no pattern. if (Target.guessInstructionProperties()) { @@ -2992,7 +3049,7 @@ void CodeGenDAGPatterns::VerifyInstructionFlags() { } } if (Errors) - throw "Errors in DAG patterns"; + PrintFatalError("Errors in DAG patterns"); } /// Given a pattern result with an unresolved type, see if we can find one diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 66f77eae1a..9be763f2ff 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -105,7 +105,7 @@ namespace EEVT { /// MergeInTypeInfo - This merges in type information from the specified /// argument. If 'this' changes, it returns true. If the two types are - /// contradictory (e.g. merge f32 into i32) then this throws an exception. + /// contradictory (e.g. merge f32 into i32) then this flags an error. bool MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP); bool MergeInTypeInfo(MVT::SimpleValueType InVT, TreePattern &TP) { @@ -187,8 +187,8 @@ struct SDTypeConstraint { /// ApplyTypeConstraint - Given a node in a pattern, apply this type /// constraint to the nodes operands. This returns true if it makes a - /// change, false otherwise. If a type contradiction is found, throw an - /// exception. + /// change, false otherwise. If a type contradiction is found, an error + /// is flagged. bool ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo, TreePattern &TP) const; }; @@ -232,7 +232,7 @@ public: /// ApplyTypeConstraints - Given a node in a pattern, apply the type /// constraints for this node to the operands of the node. This returns /// true if it makes a change, false otherwise. If a type contradiction is - /// found, throw an exception. + /// found, an error is flagged. bool ApplyTypeConstraints(TreePatternNode *N, TreePattern &TP) const { bool MadeChange = false; for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i) @@ -446,13 +446,12 @@ public: // Higher level manipulation routines. /// ApplyTypeConstraints - Apply all of the type constraints relevant to /// this node and its children in the tree. This returns true if it makes a - /// change, false otherwise. If a type contradiction is found, throw an - /// exception. + /// change, false otherwise. If a type contradiction is found, flag an error. bool ApplyTypeConstraints(TreePattern &TP, bool NotRegisters); /// UpdateNodeType - Set the node type of N to VT if VT contains - /// information. If N already contains a conflicting type, then throw an - /// exception. This returns true if any information was updated. + /// information. If N already contains a conflicting type, then flag an + /// error. This returns true if any information was updated. /// bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy, TreePattern &TP) { @@ -514,6 +513,10 @@ class TreePattern { /// isInputPattern - True if this is an input pattern, something to match. /// False if this is an output pattern, something to emit. bool isInputPattern; + + /// hasError - True if the currently processed nodes have unresolvable types + /// or other non-fatal errors + bool HasError; public: /// TreePattern constructor - Parse the specified DagInits into the @@ -565,13 +568,19 @@ public: /// InferAllTypes - Infer/propagate as many types throughout the expression /// patterns as possible. Return true if all types are inferred, false - /// otherwise. Throw an exception if a type contradiction is found. + /// otherwise. Bail out if a type contradiction is found. bool InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *NamedTypes=0); - /// error - Throw an exception, prefixing it with information about this - /// pattern. - void error(const std::string &Msg) const; + /// error - If this is the first error in the current resolution step, + /// print it and set the error flag. Otherwise, continue silently. + void error(const std::string &Msg); + bool hasError() const { + return HasError; + } + void resetError() { + HasError = false; + } void print(raw_ostream &OS) const; void dump() const; @@ -602,7 +611,7 @@ public: : Pattern(TP), Results(results), Operands(operands), ImpResults(impresults), ResultPattern(0) {} - const TreePattern *getPattern() const { return Pattern; } + TreePattern *getPattern() const { return Pattern; } unsigned getNumResults() const { return Results.size(); } unsigned getNumOperands() const { return Operands.size(); } unsigned getNumImpResults() const { return ImpResults.size(); } @@ -794,7 +803,7 @@ private: void GenerateVariants(); void VerifyInstructionFlags(); - void AddPatternToMatch(const TreePattern *Pattern, const PatternToMatch &PTM); + void AddPatternToMatch(TreePattern *Pattern, const PatternToMatch &PTM); void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat, std::map<std::string, TreePatternNode*> &InstInputs, diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index fd38672bfc..0a8684d3da 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -34,18 +34,18 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { if (DefInit *Init = dyn_cast<DefInit>(OutDI->getOperator())) { if (Init->getDef()->getName() != "outs") - throw R->getName() + ": invalid def name for output list: use 'outs'"; + PrintFatalError(R->getName() + ": invalid def name for output list: use 'outs'"); } else - throw R->getName() + ": invalid output list: use 'outs'"; + PrintFatalError(R->getName() + ": invalid output list: use 'outs'"); NumDefs = OutDI->getNumArgs(); DagInit *InDI = R->getValueAsDag("InOperandList"); if (DefInit *Init = dyn_cast<DefInit>(InDI->getOperator())) { if (Init->getDef()->getName() != "ins") - throw R->getName() + ": invalid def name for input list: use 'ins'"; + PrintFatalError(R->getName() + ": invalid def name for input list: use 'ins'"); } else - throw R->getName() + ": invalid input list: use 'ins'"; + PrintFatalError(R->getName() + ": invalid input list: use 'ins'"); unsigned MIOperandNo = 0; std::set<std::string> OperandNames; @@ -62,7 +62,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { DefInit *Arg = dyn_cast<DefInit>(ArgInit); if (!Arg) - throw "Illegal operand for the '" + R->getName() + "' instruction!"; + PrintFatalError("Illegal operand for the '" + R->getName() + "' instruction!"); Record *Rec = Arg->getDef(); std::string PrintMethod = "printOperand"; @@ -82,8 +82,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { // Verify that MIOpInfo has an 'ops' root value. if (!isa<DefInit>(MIOpInfo->getOperator()) || cast<DefInit>(MIOpInfo->getOperator())->getDef()->getName() != "ops") - throw "Bad value for MIOperandInfo in operand '" + Rec->getName() + - "'\n"; + PrintFatalError("Bad value for MIOperandInfo in operand '" + Rec->getName() + + "'\n"); // If we have MIOpInfo, then we have #operands equal to number of entries // in MIOperandInfo. @@ -101,16 +101,16 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { OperandType = "OPERAND_REGISTER"; } else if (!Rec->isSubClassOf("PointerLikeRegClass") && !Rec->isSubClassOf("unknown_class")) - throw "Unknown operand class '" + Rec->getName() + - "' in '" + R->getName() + "' instruction!"; + PrintFatalError("Unknown operand class '" + Rec->getName() + + "' in '" + R->getName() + "' instruction!"); // Check that the operand has a name and that it's unique. if (ArgName.empty()) - throw "In instruction '" + R->getName() + "', operand #" + utostr(i) + - " has no name!"; + PrintFatalError("In instruction '" + R->getName() + "', operand #" + utostr(i) + + " has no name!"); if (!OperandNames.insert(ArgName).second) - throw "In instruction '" + R->getName() + "', operand #" + utostr(i) + - " has the same name as a previous operand!"; + PrintFatalError("In instruction '" + R->getName() + "', operand #" + utostr(i) + + " has the same name as a previous operand!"); OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod, OperandType, MIOperandNo, NumOps, @@ -128,13 +128,13 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) { /// getOperandNamed - Return the index of the operand with the specified /// non-empty name. If the instruction does not have an operand with the -/// specified name, throw an exception. +/// specified name, abort. /// unsigned CGIOperandList::getOperandNamed(StringRef Name) const { unsigned OpIdx; if (hasOperandNamed(Name, OpIdx)) return OpIdx; - throw "'" + TheDef->getName() + "' does not have an operand named '$" + - Name.str() + "'!"; + PrintFatalError("'" + TheDef->getName() + "' does not have an operand named '$" + + Name.str() + "'!"); } /// hasOperandNamed - Query whether the instruction has an operand of the @@ -153,7 +153,7 @@ bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const { std::pair<unsigned,unsigned> CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) { if (Op.empty() || Op[0] != '$') - throw TheDef->getName() + ": Illegal operand name: '" + Op + "'"; + PrintFatalError(TheDef->getName() + ": Illegal operand name: '" + Op + "'"); std::string OpName = Op.substr(1); std::string SubOpName; @@ -163,7 +163,7 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) { if (DotIdx != std::string::npos) { SubOpName = OpName.substr(DotIdx+1); if (SubOpName.empty()) - throw TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'"; + PrintFatalError(TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'"); OpName = OpName.substr(0, DotIdx); } @@ -173,8 +173,8 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) { // If one was needed, throw. if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp && SubOpName.empty()) - throw TheDef->getName() + ": Illegal to refer to" - " whole operand part of complex operand '" + Op + "'"; + PrintFatalError(TheDef->getName() + ": Illegal to refer to" + " whole operand part of complex operand '" + Op + "'"); // Otherwise, return the operand. return std::make_pair(OpIdx, 0U); @@ -183,7 +183,7 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) { // Find the suboperand number involved. DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo; if (MIOpInfo == 0) - throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'"; + PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'"); // Find the operand with the right name. for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i) @@ -191,7 +191,7 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) { return std::make_pair(OpIdx, i); // Otherwise, didn't find it! - throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'"; + PrintFatalError(TheDef->getName() + ": unknown suboperand name in '" + Op + "'"); } static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) { @@ -203,13 +203,13 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) { std::string Name = CStr.substr(wpos+1); wpos = Name.find_first_not_of(" \t"); if (wpos == std::string::npos) - throw "Illegal format for @earlyclobber constraint: '" + CStr + "'"; + PrintFatalError("Illegal format for @earlyclobber constraint: '" + CStr + "'"); Name = Name.substr(wpos); std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false); // Build the string for the operand if (!Ops[Op.first].Constraints[Op.second].isNone()) - throw "Operand '" + Name + "' cannot have multiple constraints!"; + PrintFatalError("Operand '" + Name + "' cannot have multiple constraints!"); Ops[Op.first].Constraints[Op.second] = CGIOperandList::ConstraintInfo::getEarlyClobber(); return; @@ -224,14 +224,14 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) { // TIED_TO: $src1 = $dst wpos = Name.find_first_of(" \t"); if (wpos == std::string::npos) - throw "Illegal format for tied-to constraint: '" + CStr + "'"; + PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'"); std::string DestOpName = Name.substr(0, wpos); std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false); Name = CStr.substr(pos+1); wpos = Name.find_first_not_of(" \t"); if (wpos == std::string::npos) - throw "Illegal format for tied-to constraint: '" + CStr + "'"; + PrintFatalError("Illegal format for tied-to constraint: '" + CStr + "'"); std::string SrcOpName = Name.substr(wpos); std::pair<unsigned,unsigned> SrcOp = Ops.ParseOperandName(SrcOpName, false); @@ -243,7 +243,8 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) { unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp); if (!Ops[DestOp.first].Constraints[DestOp.second].isNone()) - throw "Operand '" + DestOpName + "' cannot have multiple constraints!"; + PrintFatalError("Operand '" + DestOpName + + "' cannot have multiple constraints!"); Ops[DestOp.first].Constraints[DestOp.second] = CGIOperandList::ConstraintInfo::getTied(FlatOpNo); } @@ -328,7 +329,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) ImplicitUses = R->getValueAsListOfDefs("Uses"); if (neverHasSideEffects + hasSideEffects > 1) - throw R->getName() + ": multiple conflicting side-effect flags set!"; + PrintFatalError(R->getName() + ": multiple conflicting side-effect flags set!"); // Parse Constraints. ParseConstraints(R->getValueAsString("Constraints"), Operands); @@ -422,7 +423,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, // If the operand is a record, it must have a name, and the record type // must match up with the instruction's argument type. if (Result->getArgName(AliasOpNo).empty()) - throw TGError(Loc, "result argument #" + utostr(AliasOpNo) + + PrintFatalError(Loc, "result argument #" + utostr(AliasOpNo) + " must have a name!"); ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef()); return true; @@ -457,13 +458,13 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, if (!T.getRegisterClass(InstOpRec) .contains(T.getRegBank().getReg(ADI->getDef()))) - throw TGError(Loc, "fixed register " + ADI->getDef()->getName() + - " is not a member of the " + InstOpRec->getName() + - " register class!"); + PrintFatalError(Loc, "fixed register " + ADI->getDef()->getName() + + " is not a member of the " + InstOpRec->getName() + + " register class!"); if (!Result->getArgName(AliasOpNo).empty()) - throw TGError(Loc, "result fixed register argument must " - "not have a name!"); + PrintFatalError(Loc, "result fixed register argument must " + "not have a name!"); ResOp = ResultOperand(ADI->getDef()); return true; @@ -491,8 +492,8 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, return false; // Integer arguments can't have names. if (!Result->getArgName(AliasOpNo).empty()) - throw TGError(Loc, "result argument #" + utostr(AliasOpNo) + - " must not have a name!"); + PrintFatalError(Loc, "result argument #" + utostr(AliasOpNo) + + " must not have a name!"); ResOp = ResultOperand(II->getValue()); return true; } @@ -520,7 +521,8 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { // Verify that the root of the result is an instruction. DefInit *DI = dyn_cast<DefInit>(Result->getOperator()); if (DI == 0 || !DI->getDef()->isSubClassOf("Instruction")) - throw TGError(R->getLoc(), "result of inst alias should be an instruction"); + PrintFatalError(R->getLoc(), + "result of inst alias should be an instruction"); ResultInst = &T.getInstruction(DI->getDef()); @@ -536,9 +538,9 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { // same type. Record *&Entry = NameClass[Result->getArgName(i)]; if (Entry && Entry != ADI->getDef()) - throw TGError(R->getLoc(), "result value $" + Result->getArgName(i) + - " is both " + Entry->getName() + " and " + - ADI->getDef()->getName() + "!"); + PrintFatalError(R->getLoc(), "result value $" + Result->getArgName(i) + + " is both " + Entry->getName() + " and " + + ADI->getDef()->getName() + "!"); Entry = ADI->getDef(); } @@ -554,7 +556,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { continue; if (AliasOpNo >= Result->getNumArgs()) - throw TGError(R->getLoc(), "not enough arguments for instruction!"); + PrintFatalError(R->getLoc(), "not enough arguments for instruction!"); Record *InstOpRec = ResultInst->Operands[i].Rec; unsigned NumSubOps = ResultInst->Operands[i].MINumOperands; @@ -595,7 +597,7 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo; for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) { if (AliasOpNo >= Result->getNumArgs()) - throw TGError(R->getLoc(), "not enough arguments for instruction!"); + PrintFatalError(R->getLoc(), "not enough arguments for instruction!"); Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef(); if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false, R->getLoc(), T, ResOp)) { @@ -603,18 +605,18 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) { ResultInstOperandIndex.push_back(std::make_pair(i, SubOp)); ++AliasOpNo; } else { - throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) + + PrintFatalError(R->getLoc(), "result argument #" + utostr(AliasOpNo) + " does not match instruction operand class " + (SubOp == 0 ? InstOpRec->getName() :SubRec->getName())); } } continue; } - throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) + - " does not match instruction operand class " + - InstOpRec->getName()); + PrintFatalError(R->getLoc(), "result argument #" + utostr(AliasOpNo) + + " does not match instruction operand class " + + InstOpRec->getName()); } if (AliasOpNo != Result->getNumArgs()) - throw TGError(R->getLoc(), "too many operands for instruction!"); + PrintFatalError(R->getLoc(), "too many operands for instruction!"); } diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index f601a8318f..55d44399df 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -152,7 +152,7 @@ namespace llvm { /// getOperandNamed - Return the index of the operand with the specified /// non-empty name. If the instruction does not have an operand with the - /// specified name, throw an exception. + /// specified name, abort. unsigned getOperandNamed(StringRef Name) const; /// hasOperandNamed - Query whether the instruction has an operand of the @@ -162,9 +162,8 @@ namespace llvm { /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar", /// where $foo is a whole operand and $foo.bar refers to a suboperand. - /// This throws an exception if the name is invalid. If AllowWholeOp is - /// true, references to operands with suboperands are allowed, otherwise - /// not. + /// This aborts if the name is invalid. If AllowWholeOp is true, references + /// to operands with suboperands are allowed, otherwise not. std::pair<unsigned,unsigned> ParseOperandName(const std::string &Op, bool AllowWholeOp = true); diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp index 4bfd1ba798..1653d67da9 100644 --- a/utils/TableGen/CodeGenMapTable.cpp +++ b/utils/TableGen/CodeGenMapTable.cpp @@ -78,6 +78,7 @@ #include "CodeGenTarget.h" #include "llvm/Support/Format.h" +#include "llvm/TableGen/Error.h" using namespace llvm; typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy; @@ -128,20 +129,19 @@ public: // Each instruction map must specify at least one column for it to be valid. if (ColValList->getSize() == 0) - throw "InstrMapping record `" + MapRec->getName() + "' has empty " + - "`ValueCols' field!"; + PrintFatalError(MapRec->getLoc(), "InstrMapping record `" + + MapRec->getName() + "' has empty " + "`ValueCols' field!"); for (unsigned i = 0, e = ColValList->getSize(); i < e; i++) { ListInit *ColI = dyn_cast<ListInit>(ColValList->getElement(i)); // Make sure that all the sub-lists in 'ValueCols' have same number of // elements as the fields in 'ColFields'. - if (ColI->getSize() == ColFields->getSize()) - ValueCols.push_back(ColI); - else { - throw "Record `" + MapRec->getName() + "', field `" + "ValueCols" + - "' entries don't match with the entries in 'ColFields'!"; - } + if (ColI->getSize() != ColFields->getSize()) + PrintFatalError(MapRec->getLoc(), "Record `" + MapRec->getName() + + "', field `ValueCols' entries don't match with " + + " the entries in 'ColFields'!"); + ValueCols.push_back(ColI); } } @@ -344,10 +344,9 @@ Record *MapTableEmitter::getInstrForColumn(Record *KeyInstr, if (MatchFound) { if (MatchInstr) // Already had a match // Error if multiple matches are found for a column. - throw "Multiple matches found for `" + KeyInstr->getName() + - "', for the relation `" + InstrMapDesc.getName(); - else - MatchInstr = CurInstr; + PrintFatalError("Multiple matches found for `" + KeyInstr->getName() + + "', for the relation `" + InstrMapDesc.getName()); + MatchInstr = CurInstr; } } return MatchInstr; @@ -516,10 +515,9 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) { for (unsigned j = 0; j < ListSize; j++) { ListInit *ListJ = dyn_cast<ListInit>(List->getElement(j)); - if (ListJ->getSize() != ColFields->getSize()) { - throw "Record `" + CurMap->getName() + "', field `" + "ValueCols" + - "' entries don't match with the entries in 'ColFields' !"; - } + if (ListJ->getSize() != ColFields->getSize()) + PrintFatalError("Record `" + CurMap->getName() + "', field " + "`ValueCols' entries don't match with the entries in 'ColFields' !"); ValueCols.push_back(ListJ); } diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index 10064fdd16..580e319f24 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -54,19 +54,20 @@ void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) { std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf"); if (!Comps.empty()) { if (Comps.size() != 2) - throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries"); + PrintFatalError(TheDef->getLoc(), + "ComposedOf must have exactly two entries"); CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]); CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]); CodeGenSubRegIndex *X = A->addComposite(B, this); if (X) - throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); + PrintFatalError(TheDef->getLoc(), "Ambiguous ComposedOf entries"); } std::vector<Record*> Parts = TheDef->getValueAsListOfDefs("CoveringSubRegIndices"); if (!Parts.empty()) { if (Parts.size() < 2) - throw TGError(TheDef->getLoc(), + PrintFatalError(TheDef->getLoc(), "CoveredBySubRegs must have two or more entries"); SmallVector<CodeGenSubRegIndex*, 8> IdxParts; for (unsigned i = 0, e = Parts.size(); i != e; ++i) @@ -112,8 +113,8 @@ void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) { std::vector<Record*> SRs = TheDef->getValueAsListOfDefs("SubRegs"); if (SRIs.size() != SRs.size()) - throw TGError(TheDef->getLoc(), - "SubRegs and SubRegIndices must have the same size"); + PrintFatalError(TheDef->getLoc(), + "SubRegs and SubRegIndices must have the same size"); for (unsigned i = 0, e = SRIs.size(); i != e; ++i) { ExplicitSubRegIndices.push_back(RegBank.getSubRegIdx(SRIs[i])); @@ -224,8 +225,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { CodeGenRegister *SR = ExplicitSubRegs[i]; CodeGenSubRegIndex *Idx = ExplicitSubRegIndices[i]; if (!SubRegs.insert(std::make_pair(Idx, SR)).second) - throw TGError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() + - " appears twice in Register " + getName()); + PrintFatalError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() + + " appears twice in Register " + getName()); // Map explicit sub-registers first, so the names take precedence. // The inherited sub-registers are mapped below. SubReg2Idx.insert(std::make_pair(SR, Idx)); @@ -308,8 +309,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { ArrayRef<SMLoc> Loc; if (TheDef) Loc = TheDef->getLoc(); - throw TGError(Loc, "Register " + getName() + - " has itself as a sub-register"); + PrintFatalError(Loc, "Register " + getName() + + " has itself as a sub-register"); } // Ensure that every sub-register has a unique name. DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins = @@ -320,7 +321,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { ArrayRef<SMLoc> Loc; if (TheDef) Loc = TheDef->getLoc(); - throw TGError(Loc, "Sub-register can't have two names: " + + PrintFatalError(Loc, "Sub-register can't have two names: " + SI->second->getName() + " available as " + SI->first->getName() + " and " + Ins->second->getName()); } @@ -467,8 +468,8 @@ void CodeGenRegister::computeSecondarySubRegs(CodeGenRegBank &RegBank) { SE = NewSubReg->SubRegs.end(); SI != SE; ++SI) { CodeGenSubRegIndex *SubIdx = getSubRegIndex(SI->second); if (!SubIdx) - throw TGError(TheDef->getLoc(), "No SubRegIndex for " + - SI->second->getName() + " in " + getName()); + PrintFatalError(TheDef->getLoc(), "No SubRegIndex for " + + SI->second->getName() + " in " + getName()); NewIdx->addComposite(SI->first, SubIdx); } } @@ -592,9 +593,10 @@ struct TupleExpander : SetTheory::Expander { unsigned Dim = Indices.size(); ListInit *SubRegs = Def->getValueAsListInit("SubRegs"); if (Dim != SubRegs->getSize()) - throw TGError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch"); + PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch"); if (Dim < 2) - throw TGError(Def->getLoc(), "Tuples must have at least 2 sub-registers"); + PrintFatalError(Def->getLoc(), + "Tuples must have at least 2 sub-registers"); // Evaluate the sub-register lists to be zipped. unsigned Length = ~0u; @@ -706,8 +708,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { Record *Type = TypeList[i]; if (!Type->isSubClassOf("ValueType")) - throw "RegTypes list member '" + Type->getName() + - "' does not derive from the ValueType class!"; + PrintFatalError("RegTypes list member '" + Type->getName() + + "' does not derive from the ValueType class!"); VTs.push_back(getValueType(Type)); } assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!"); @@ -735,7 +737,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) CodeGenRegister *Reg = RegBank.getReg(Order.back()); Order.pop_back(); if (!contains(Reg)) - throw TGError(R->getLoc(), " AltOrder register " + Reg->getName() + + PrintFatalError(R->getLoc(), " AltOrder register " + Reg->getName() + " is not a class member"); } } @@ -1021,7 +1023,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) { // Read in register class definitions. std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass"); if (RCs.empty()) - throw std::string("No 'RegisterClass' subclasses defined!"); + PrintFatalError(std::string("No 'RegisterClass' subclasses defined!")); // Allocate user-defined register classes. RegClasses.reserve(RCs.size()); @@ -1098,7 +1100,7 @@ CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) { if (CodeGenRegisterClass *RC = Def2RC[Def]) return RC; - throw TGError(Def->getLoc(), "Not a known RegisterClass!"); + PrintFatalError(Def->getLoc(), "Not a known RegisterClass!"); } CodeGenSubRegIndex* diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index 1cca3e3f85..63cc97a8c1 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -63,7 +63,7 @@ struct InstRegexOp : public SetTheory::Operator { AI = Expr->arg_begin(), AE = Expr->arg_end(); AI != AE; ++AI) { StringInit *SI = dyn_cast<StringInit>(*AI); if (!SI) - throw TGError(Loc, "instregex requires pattern string: " + PrintFatalError(Loc, "instregex requires pattern string: " + Expr->getAsString()); std::string pat = SI->getValue(); // Implement a python-style prefix match. @@ -268,13 +268,13 @@ void CodeGenSchedModels::collectSchedRW() { Record *AliasDef = (*AI)->getValueAsDef("AliasRW"); if (MatchDef->isSubClassOf("SchedWrite")) { if (!AliasDef->isSubClassOf("SchedWrite")) - throw TGError((*AI)->getLoc(), "SchedWrite Alias must be SchedWrite"); + PrintFatalError((*AI)->getLoc(), "SchedWrite Alias must be SchedWrite"); scanSchedRW(AliasDef, SWDefs, RWSet); } else { assert(MatchDef->isSubClassOf("SchedRead") && "Unknown SchedReadWrite"); if (!AliasDef->isSubClassOf("SchedRead")) - throw TGError((*AI)->getLoc(), "SchedRead Alias must be SchedRead"); + PrintFatalError((*AI)->getLoc(), "SchedRead Alias must be SchedRead"); scanSchedRW(AliasDef, SRDefs, RWSet); } } @@ -305,7 +305,7 @@ void CodeGenSchedModels::collectSchedRW() { Record *MatchDef = (*AI)->getValueAsDef("MatchRW"); CodeGenSchedRW &RW = getSchedRW(MatchDef); if (RW.IsAlias) - throw TGError((*AI)->getLoc(), "Cannot Alias an Alias"); + PrintFatalError((*AI)->getLoc(), "Cannot Alias an Alias"); RW.Aliases.push_back(*AI); } DEBUG( @@ -437,9 +437,9 @@ void CodeGenSchedModels::expandRWSeqForProc( continue; } if (AliasDef) - throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases " - "defined for processor " + ProcModel.ModelName + - " Ensure only one SchedAlias exists per RW."); + PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases " + "defined for processor " + ProcModel.ModelName + + " Ensure only one SchedAlias exists per RW."); AliasDef = AliasRW.TheDef; } if (AliasDef) { @@ -706,7 +706,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { // Sort Instrs into sets. const RecVec *InstDefs = Sets.expand(InstRWDef); if (InstDefs->empty()) - throw TGError(InstRWDef->getLoc(), "No matching instruction opcodes"); + PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes"); for (RecIter I = InstDefs->begin(), E = InstDefs->end(); I != E; ++I) { unsigned SCIdx = 0; @@ -766,7 +766,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { for (RecIter RI = SchedClasses[OldSCIdx].InstRWs.begin(), RE = SchedClasses[OldSCIdx].InstRWs.end(); RI != RE; ++RI) { if ((*RI)->getValueAsDef("SchedModel") == RWModelDef) { - throw TGError(InstRWDef->getLoc(), "Overlapping InstRW def " + + PrintFatalError(InstRWDef->getLoc(), "Overlapping InstRW def " + (*II)->getName() + " also matches " + (*RI)->getValue("Instrs")->getValue()->getAsString()); } @@ -825,11 +825,11 @@ void CodeGenSchedModels::collectProcItinRW() { std::sort(ItinRWDefs.begin(), ItinRWDefs.end(), LessRecord()); for (RecIter II = ItinRWDefs.begin(), IE = ItinRWDefs.end(); II != IE; ++II) { if (!(*II)->getValueInit("SchedModel")->isComplete()) - throw TGError((*II)->getLoc(), "SchedModel is undefined"); + PrintFatalError((*II)->getLoc(), "SchedModel is undefined"); Record *ModelDef = (*II)->getValueAsDef("SchedModel"); ProcModelMapTy::const_iterator I = ProcModelMap.find(ModelDef); if (I == ProcModelMap.end()) { - throw TGError((*II)->getLoc(), "Undefined SchedMachineModel " + PrintFatalError((*II)->getLoc(), "Undefined SchedMachineModel " + ModelDef->getName()); } ProcModels[I->second].ItinRWDefs.push_back(*II); @@ -867,7 +867,7 @@ void CodeGenSchedModels::inferFromItinClass(Record *ItinClassDef, if (!std::count(Matched.begin(), Matched.end(), ItinClassDef)) continue; if (HasMatch) - throw TGError((*II)->getLoc(), "Duplicate itinerary class " + PrintFatalError((*II)->getLoc(), "Duplicate itinerary class " + ItinClassDef->getName() + " in ItinResources for " + PM.ModelName); HasMatch = true; @@ -1095,9 +1095,10 @@ void PredTransitions::getIntersectingVariants( if (Cnt > 1) { const CodeGenProcModel &PM = *(SchedModels.procModelBegin() + Variant.ProcIdx); - throw TGError(Variant.VarOrSeqDef->getLoc(), - "Multiple variants defined for processor " + PM.ModelName + - " Ensure only one SchedAlias exists per RW."); + PrintFatalError(Variant.VarOrSeqDef->getLoc(), + "Multiple variants defined for processor " + + PM.ModelName + + " Ensure only one SchedAlias exists per RW."); } } if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) { @@ -1215,8 +1216,9 @@ void PredTransitions::substituteVariantOperand( std::vector<TransVariant> IntersectingVariants; getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); if (IntersectingVariants.empty()) - throw TGError(SchedRW.TheDef->getLoc(), "No variant of this type has a " - "matching predicate on any processor "); + PrintFatalError(SchedRW.TheDef->getLoc(), + "No variant of this type has " + "a matching predicate on any processor"); // Now expand each variant on top of its copy of the transition. for (std::vector<TransVariant>::const_iterator IVI = IntersectingVariants.begin(), @@ -1440,9 +1442,9 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) { if (!std::count(Matched.begin(), Matched.end(), ItinClassDef)) continue; if (HasMatch) - throw TGError((*II)->getLoc(), "Duplicate itinerary class " - + ItinClassDef->getName() - + " in ItinResources for " + PM.ModelName); + PrintFatalError((*II)->getLoc(), "Duplicate itinerary class " + + ItinClassDef->getName() + + " in ItinResources for " + PM.ModelName); HasMatch = true; IdxVec Writes, Reads; findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads); @@ -1519,17 +1521,17 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind, if ((*RI)->getValueAsDef("Kind") == ProcResKind && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) { if (ProcUnitDef) { - throw TGError((*RI)->getLoc(), - "Multiple ProcessorResourceUnits associated with " - + ProcResKind->getName()); + PrintFatalError((*RI)->getLoc(), + "Multiple ProcessorResourceUnits associated with " + + ProcResKind->getName()); } ProcUnitDef = *RI; } } if (!ProcUnitDef) { - throw TGError(ProcResKind->getLoc(), - "No ProcessorResources associated with " - + ProcResKind->getName()); + PrintFatalError(ProcResKind->getLoc(), + "No ProcessorResources associated with " + + ProcResKind->getName()); } return ProcUnitDef; } @@ -1586,10 +1588,10 @@ unsigned CodeGenProcModel::getProcResourceIdx(Record *PRDef) const { RecIter PRPos = std::find(ProcResourceDefs.begin(), ProcResourceDefs.end(), PRDef); if (PRPos == ProcResourceDefs.end()) - throw TGError(PRDef->getLoc(), "ProcResource def is not included in " - "the ProcResources list for " + ModelName); + PrintFatalError(PRDef->getLoc(), "ProcResource def is not included in " + "the ProcResources list for " + ModelName); // Idx=0 is reserved for invalid. - return 1 + PRPos - ProcResourceDefs.begin(); + return 1 + (PRPos - ProcResourceDefs.begin()); } #ifndef NDEBUG diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index 42c7a59702..bd55e697c5 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -10,13 +10,14 @@ // This class wraps target description classes used by the various code // generation TableGen backends. This makes it easier to access the data and // provides a single place that needs to check it for validity. All of these -// classes throw exceptions on error conditions. +// classes abort on error conditions. // //===----------------------------------------------------------------------===// #include "CodeGenTarget.h" #include "CodeGenIntrinsics.h" #include "CodeGenSchedule.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" @@ -124,9 +125,9 @@ CodeGenTarget::CodeGenTarget(RecordKeeper &records) : Records(records), RegBank(0), SchedModels(0) { std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target"); if (Targets.size() == 0) - throw std::string("ERROR: No 'Target' subclasses defined!"); + PrintFatalError("ERROR: No 'Target' subclasses defined!"); if (Targets.size() != 1) - throw std::string("ERROR: Multiple subclasses of Target defined!"); + PrintFatalError("ERROR: Multiple subclasses of Target defined!"); TargetRec = Targets[0]; } @@ -160,7 +161,7 @@ Record *CodeGenTarget::getInstructionSet() const { Record *CodeGenTarget::getAsmParser() const { std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyParsers"); if (AsmParserNum >= LI.size()) - throw "Target does not have an AsmParser #" + utostr(AsmParserNum) + "!"; + PrintFatalError("Target does not have an AsmParser #" + utostr(AsmParserNum) + "!"); return LI[AsmParserNum]; } @@ -171,7 +172,7 @@ Record *CodeGenTarget::getAsmParserVariant(unsigned i) const { std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyParserVariants"); if (i >= LI.size()) - throw "Target does not have an AsmParserVariant #" + utostr(i) + "!"; + PrintFatalError("Target does not have an AsmParserVariant #" + utostr(i) + "!"); return LI[i]; } @@ -189,7 +190,7 @@ unsigned CodeGenTarget::getAsmParserVariantCount() const { Record *CodeGenTarget::getAsmWriter() const { std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyWriters"); if (AsmWriterNum >= LI.size()) - throw "Target does not have an AsmWriter #" + utostr(AsmWriterNum) + "!"; + PrintFatalError("Target does not have an AsmWriter #" + utostr(AsmWriterNum) + "!"); return LI[AsmWriterNum]; } @@ -256,7 +257,7 @@ CodeGenSchedModels &CodeGenTarget::getSchedModels() const { void CodeGenTarget::ReadInstructions() const { std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction"); if (Insts.size() <= 2) - throw std::string("No 'Instruction' subclasses defined!"); + PrintFatalError("No 'Instruction' subclasses defined!"); // Parse the instructions defined in the .td file. for (unsigned i = 0, e = Insts.size(); i != e; ++i) @@ -272,7 +273,7 @@ GetInstByName(const char *Name, DenseMap<const Record*, CodeGenInstruction*>::const_iterator I = Insts.find(Rec); if (Rec == 0 || I == Insts.end()) - throw std::string("Could not find '") + Name + "' instruction!"; + PrintFatalError(std::string("Could not find '") + Name + "' instruction!"); return I->second; } @@ -425,7 +426,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { if (DefName.size() <= 4 || std::string(DefName.begin(), DefName.begin() + 4) != "int_") - throw "Intrinsic '" + DefName + "' does not start with 'int_'!"; + PrintFatalError("Intrinsic '" + DefName + "' does not start with 'int_'!"); EnumName = std::string(DefName.begin()+4, DefName.end()); @@ -445,7 +446,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // Verify it starts with "llvm.". if (Name.size() <= 5 || std::string(Name.begin(), Name.begin() + 5) != "llvm.") - throw "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!"; + PrintFatalError("Intrinsic '" + DefName + "'s name does not start with 'llvm.'!"); } // If TargetPrefix is specified, make sure that Name starts with @@ -454,8 +455,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { if (Name.size() < 6+TargetPrefix.size() || std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size()) != (TargetPrefix + ".")) - throw "Intrinsic '" + DefName + "' does not start with 'llvm." + - TargetPrefix + ".'!"; + PrintFatalError("Intrinsic '" + DefName + "' does not start with 'llvm." + + TargetPrefix + ".'!"); } // Parse the list of return types. @@ -487,7 +488,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // Reject invalid types. if (VT == MVT::isVoid) - throw "Intrinsic '" + DefName + " has void in result type list!"; + PrintFatalError("Intrinsic '" + DefName + " has void in result type list!"); IS.RetVTs.push_back(VT); IS.RetTypeDefs.push_back(TyEl); @@ -521,7 +522,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) { // Reject invalid types. if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/) - throw "Intrinsic '" + DefName + " has void in result type list!"; + PrintFatalError("Intrinsic '" + DefName + " has void in result type list!"); IS.ParamVTs.push_back(VT); IS.ParamTypeDefs.push_back(TyEl); diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 672b1406a5..ddeecee36f 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -9,8 +9,8 @@ // // This file defines wrappers for the Target class and related global // functionality. This makes it easier to access the data and provides a single -// place that needs to check it for validity. All of these classes throw -// exceptions on error conditions. +// place that needs to check it for validity. All of these classes abort +// on error conditions. // //===----------------------------------------------------------------------===// diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 70c6fe6000..573f55875e 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -10,6 +10,7 @@ #include "DAGISelMatcher.h" #include "CodeGenDAGPatterns.h" #include "CodeGenRegisters.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -172,15 +173,10 @@ void MatcherGen::InferPossibleTypes() { // diagnostics, which we know are impossible at this point. TreePattern &TP = *CGP.pf_begin()->second; - try { - bool MadeChange = true; - while (MadeChange) - MadeChange = PatWithNoTypes->ApplyTypeConstraints(TP, - true/*Ignore reg constraints*/); - } catch (...) { - errs() << "Type constraint application shouldn't fail!"; - abort(); - } + bool MadeChange = true; + while (MadeChange) + MadeChange = PatWithNoTypes->ApplyTypeConstraints(TP, + true/*Ignore reg constraints*/); } @@ -876,7 +872,7 @@ void MatcherGen::EmitResultOperand(const TreePatternNode *N, if (OpRec->isSubClassOf("SDNodeXForm")) return EmitResultSDNodeXFormAsOperand(N, ResultOps); errs() << "Unknown result node to emit code for: " << *N << '\n'; - throw std::string("Unknown node in result pattern!"); + PrintFatalError("Unknown node in result pattern!"); } void MatcherGen::EmitResultCode() { diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp index 826465a516..2d11d2480d 100644 --- a/utils/TableGen/DisassemblerEmitter.cpp +++ b/utils/TableGen/DisassemblerEmitter.cpp @@ -117,11 +117,9 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i) RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i); - // FIXME: As long as we are using exceptions, might as well drop this to the - // actual conflict site. if (Tables.hasConflicts()) - throw TGError(Target.getTargetRecord()->getLoc(), - "Primary decode conflict"); + PrintFatalError(Target.getTargetRecord()->getLoc(), + "Primary decode conflict"); Tables.emit(OS); return; diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index 2b3c20a2ce..4101076f33 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include <string> @@ -778,7 +779,7 @@ static void ARMPopulateOperands( errs() << "Operand type: " << rec.getName() << '\n'; errs() << "Operand name: " << operandInfo.Name << '\n'; errs() << "Instruction name: " << inst.TheDef->getName() << '\n'; - throw("Unhandled type in EDEmitter"); + PrintFatalError("Unhandled type in EDEmitter"); } } } diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp index 03e918fa4b..8b1e7f9256 100644 --- a/utils/TableGen/FastISelEmitter.cpp +++ b/utils/TableGen/FastISelEmitter.cpp @@ -549,7 +549,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) { }; if (SimplePatterns[Operands][OpcodeName][VT][RetVT].count(PredicateCheck)) - throw TGError(Pattern.getSrcRecord()->getLoc(), + PrintFatalError(Pattern.getSrcRecord()->getLoc(), "Duplicate record in FastISel table!"); SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo; diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index c53776b9ff..5cabcadabd 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "decoder-emitter" #include "CodeGenTarget.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallString.h" @@ -741,7 +742,7 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS, switch (*I) { default: - throw "invalid decode table opcode"; + PrintFatalError("invalid decode table opcode"); case MCD::OPC_ExtractField: { ++I; unsigned Start = *I++; diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index 8e670e3cbc..48d41d7b96 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -19,6 +19,7 @@ #include "TableGenBackends.h" #include "SequenceToOffsetTable.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" #include <algorithm> @@ -343,13 +344,14 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, // Emit all of the target-specific flags... BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags"); - if (!TSF) throw "no TSFlags?"; + if (!TSF) + PrintFatalError("no TSFlags?"); uint64_t Value = 0; for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) { if (BitInit *Bit = dyn_cast<BitInit>(TSF->getBit(i))) Value |= uint64_t(Bit->getValue()) << i; else - throw "Invalid TSFlags bit in " + Inst.TheDef->getName(); + PrintFatalError("Invalid TSFlags bit in " + Inst.TheDef->getName()); } OS << ", 0x"; OS.write_hex(Value); diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index e830a66a33..e1910784a6 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -15,6 +15,7 @@ #include "CodeGenTarget.h" #include "SequenceToOffsetTable.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/TableGenBackend.h" @@ -249,7 +250,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT, if (EVT(VT).isInteger()) { unsigned BitWidth = EVT(VT).getSizeInBits(); switch (BitWidth) { - default: throw "unhandled integer type width in intrinsic!"; + default: PrintFatalError("unhandled integer type width in intrinsic!"); case 1: return Sig.push_back(IIT_I1); case 8: return Sig.push_back(IIT_I8); case 16: return Sig.push_back(IIT_I16); @@ -259,7 +260,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT, } switch (VT) { - default: throw "unhandled MVT in intrinsic!"; + default: PrintFatalError("unhandled MVT in intrinsic!"); case MVT::f32: return Sig.push_back(IIT_F32); case MVT::f64: return Sig.push_back(IIT_F64); case MVT::Metadata: return Sig.push_back(IIT_METADATA); @@ -328,7 +329,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes, if (EVT(VT).isVector()) { EVT VVT = VT; switch (VVT.getVectorNumElements()) { - default: throw "unhandled vector type width in intrinsic!"; + default: PrintFatalError("unhandled vector type width in intrinsic!"); case 2: Sig.push_back(IIT_V2); break; case 4: Sig.push_back(IIT_V4); break; case 8: Sig.push_back(IIT_V8); break; @@ -692,8 +693,8 @@ EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints, if (!BIM.insert(std::make_pair(Ints[i].GCCBuiltinName, Ints[i].EnumName)).second) - throw "Intrinsic '" + Ints[i].TheDef->getName() + - "': duplicate GCC builtin name!"; + PrintFatalError("Intrinsic '" + Ints[i].TheDef->getName() + + "': duplicate GCC builtin name!"); } } diff --git a/utils/TableGen/Makefile b/utils/TableGen/Makefile index bdf0ba01fb..9bfd94b757 100644 --- a/utils/TableGen/Makefile +++ b/utils/TableGen/Makefile @@ -10,7 +10,6 @@ LEVEL = ../.. TOOLNAME = llvm-tblgen USEDLIBS = LLVMTableGen.a LLVMSupport.a -REQUIRES_EH := 1 # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS = 1 diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp index b0241c7c27..64aaee756b 100644 --- a/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/utils/TableGen/PseudoLoweringEmitter.cpp @@ -90,7 +90,7 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn, // FIXME: We probably shouldn't ever get a non-zero BaseIdx here. assert(BaseIdx == 0 && "Named subargument in pseudo expansion?!"); if (DI->getDef() != Insn.Operands[BaseIdx + i].Rec) - throw TGError(Rec->getLoc(), + PrintFatalError(Rec->getLoc(), "Pseudo operand type '" + DI->getDef()->getName() + "' does not match expansion operand type '" + Insn.Operands[BaseIdx + i].Rec->getName() + "'"); @@ -129,22 +129,22 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) { DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator()); if (!OpDef) - throw TGError(Rec->getLoc(), Rec->getName() + + PrintFatalError(Rec->getLoc(), Rec->getName() + " has unexpected operator type!"); Record *Operator = OpDef->getDef(); if (!Operator->isSubClassOf("Instruction")) - throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + - "' is not an instruction!"); + PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + + "' is not an instruction!"); CodeGenInstruction Insn(Operator); if (Insn.isCodeGenOnly || Insn.isPseudo) - throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + - "' cannot be another pseudo instruction!"); + PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + + "' cannot be another pseudo instruction!"); if (Insn.Operands.size() != Dag->getNumArgs()) - throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + - "' operand count mismatch"); + PrintFatalError(Rec->getLoc(), "Pseudo result '" + Operator->getName() + + "' operand count mismatch"); unsigned NumMIOperands = 0; for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i) @@ -179,9 +179,9 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) { StringMap<unsigned>::iterator SourceOp = SourceOperands.find(Dag->getArgName(i)); if (SourceOp == SourceOperands.end()) - throw TGError(Rec->getLoc(), - "Pseudo output operand '" + Dag->getArgName(i) + - "' has no matching source operand."); + PrintFatalError(Rec->getLoc(), + "Pseudo output operand '" + Dag->getArgName(i) + + "' has no matching source operand."); // Map the source operand to the destination operand index for each // MachineInstr operand. for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I) diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index ad1dab4ac0..95b6267238 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -62,6 +62,8 @@ private: void EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank, const std::string &ClassName); + void emitComposeSubRegIndices(raw_ostream &OS, CodeGenRegBank &RegBank, + const std::string &ClassName); }; } // End anonymous namespace @@ -530,6 +532,102 @@ static void printDiff16(raw_ostream &OS, uint16_t Val) { OS << Val; } +// Try to combine Idx's compose map into Vec if it is compatible. +// Return false if it's not possible. +static bool combine(const CodeGenSubRegIndex *Idx, + SmallVectorImpl<CodeGenSubRegIndex*> &Vec) { + const CodeGenSubRegIndex::CompMap &Map = Idx->getComposites(); + for (CodeGenSubRegIndex::CompMap::const_iterator + I = Map.begin(), E = Map.end(); I != E; ++I) { + CodeGenSubRegIndex *&Entry = Vec[I->first->EnumValue - 1]; + if (Entry && Entry != I->second) + return false; + } + + // All entries are compatible. Make it so. + for (CodeGenSubRegIndex::CompMap::const_iterator + I = Map.begin(), E = Map.end(); I != E; ++I) + Vec[I->first->EnumValue - 1] = I->second; + return true; +} + +static const char *getMinimalTypeForRange(uint64_t Range) { + assert(Range < 0xFFFFFFFFULL && "Enum too large"); + if (Range > 0xFFFF) + return "uint32_t"; + if (Range > 0xFF) + return "uint16_t"; + return "uint8_t"; +} + +void +RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS, + CodeGenRegBank &RegBank, + const std::string &ClName) { + ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices(); + OS << "unsigned " << ClName + << "::composeSubRegIndicesImpl(unsigned IdxA, unsigned IdxB) const {\n"; + + // Many sub-register indexes are composition-compatible, meaning that + // + // compose(IdxA, IdxB) == compose(IdxA', IdxB) + // + // for many IdxA, IdxA' pairs. Not all sub-register indexes can be composed. + // The illegal entries can be use as wildcards to compress the table further. + + // Map each Sub-register index to a compatible table row. + SmallVector<unsigned, 4> RowMap; + SmallVector<SmallVector<CodeGenSubRegIndex*, 4>, 4> Rows; + + for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) { + unsigned Found = ~0u; + for (unsigned r = 0, re = Rows.size(); r != re; ++r) { + if (combine(SubRegIndices[i], Rows[r])) { + Found = r; + break; + } + } + if (Found == ~0u) { + Found = Rows.size(); + Rows.resize(Found + 1); + Rows.back().resize(SubRegIndices.size()); + combine(SubRegIndices[i], Rows.back()); + } + RowMap.push_back(Found); + } + + // Output the row map if there is multiple rows. + if (Rows.size() > 1) { + OS << " static const " << getMinimalTypeForRange(Rows.size()) + << " RowMap[" << SubRegIndices.size() << "] = {\n "; + for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) + OS << RowMap[i] << ", "; + OS << "\n };\n"; + } + + // Output the rows. + OS << " static const " << getMinimalTypeForRange(SubRegIndices.size()+1) + << " Rows[" << Rows.size() << "][" << SubRegIndices.size() << "] = {\n"; + for (unsigned r = 0, re = Rows.size(); r != re; ++r) { + OS << " { "; + for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) + if (Rows[r][i]) + OS << Rows[r][i]->EnumValue << ", "; + else + OS << "0, "; + OS << "},\n"; + } + OS << " };\n\n"; + + OS << " --IdxA; assert(IdxA < " << SubRegIndices.size() << ");\n" + << " --IdxB; assert(IdxB < " << SubRegIndices.size() << ");\n"; + if (Rows.size() > 1) + OS << " return Rows[RowMap[IdxA]][IdxB];\n"; + else + OS << " return Rows[0][IdxB];\n"; + OS << "}\n\n"; +} + // // runMCDesc - Print out MC register descriptions. // @@ -802,7 +900,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target, << " virtual bool needsStackRealignment(const MachineFunction &) const\n" << " { return false; }\n"; if (!RegBank.getSubRegIndices().empty()) { - OS << " virtual unsigned composeSubRegIndices(unsigned, unsigned) const;\n" + OS << " virtual unsigned composeSubRegIndicesImpl" + << "(unsigned, unsigned) const;\n" << " virtual const TargetRegisterClass *" "getSubClassWithSubReg(const TargetRegisterClass*, unsigned) const;\n"; } @@ -1054,31 +1153,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, std::string ClassName = Target.getName() + "GenRegisterInfo"; - // Emit composeSubRegIndices - if (!SubRegIndices.empty()) { - OS << "unsigned " << ClassName - << "::composeSubRegIndices(unsigned IdxA, unsigned IdxB) const {\n" - << " switch (IdxA) {\n" - << " default:\n return IdxB;\n"; - for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) { - bool Open = false; - for (unsigned j = 0; j != e; ++j) { - CodeGenSubRegIndex *Comp = SubRegIndices[i]->compose(SubRegIndices[j]); - if (Comp && Comp != SubRegIndices[j]) { - if (!Open) { - OS << " case " << SubRegIndices[i]->getQualifiedName() - << ": switch(IdxB) {\n default: return IdxB;\n"; - Open = true; - } - OS << " case " << SubRegIndices[j]->getQualifiedName() - << ": return " << Comp->getQualifiedName() << ";\n"; - } - } - if (Open) - OS << " }\n"; - } - OS << " }\n}\n\n"; - } + if (!SubRegIndices.empty()) + emitComposeSubRegIndices(OS, RegBank, ClassName); // Emit getSubClassWithSubReg. if (!SubRegIndices.empty()) { @@ -1092,7 +1168,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, else if (RegisterClasses.size() < UINT16_MAX) OS << " static const uint16_t Table["; else - throw "Too many register classes."; + PrintFatalError("Too many register classes."); OS << RegisterClasses.size() << "][" << SubRegIndices.size() << "] = {\n"; for (unsigned rci = 0, rce = RegisterClasses.size(); rci != rce; ++rci) { const CodeGenRegisterClass &RC = *RegisterClasses[rci]; diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp index 33a8f0e337..0dd9853843 100644 --- a/utils/TableGen/SetTheory.cpp +++ b/utils/TableGen/SetTheory.cpp @@ -36,7 +36,7 @@ struct AddOp : public SetTheory::Operator { struct SubOp : public SetTheory::Operator { void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() < 2) - throw TGError(Loc, "Set difference needs at least two arguments: " + + PrintFatalError(Loc, "Set difference needs at least two arguments: " + Expr->getAsString()); RecSet Add, Sub; ST.evaluate(*Expr->arg_begin(), Add, Loc); @@ -51,7 +51,7 @@ struct SubOp : public SetTheory::Operator { struct AndOp : public SetTheory::Operator { void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() != 2) - throw TGError(Loc, "Set intersection requires two arguments: " + + PrintFatalError(Loc, "Set intersection requires two arguments: " + Expr->getAsString()); RecSet S1, S2; ST.evaluate(Expr->arg_begin()[0], S1, Loc); @@ -70,13 +70,13 @@ struct SetIntBinOp : public SetTheory::Operator { void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (Expr->arg_size() != 2) - throw TGError(Loc, "Operator requires (Op Set, Int) arguments: " + + PrintFatalError(Loc, "Operator requires (Op Set, Int) arguments: " + Expr->getAsString()); RecSet Set; ST.evaluate(Expr->arg_begin()[0], Set, Loc); IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]); if (!II) - throw TGError(Loc, "Second argument must be an integer: " + + PrintFatalError(Loc, "Second argument must be an integer: " + Expr->getAsString()); apply2(ST, Expr, Set, II->getValue(), Elts, Loc); } @@ -88,7 +88,7 @@ struct ShlOp : public SetIntBinOp { RecSet &Set, int64_t N, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N < 0) - throw TGError(Loc, "Positive shift required: " + + PrintFatalError(Loc, "Positive shift required: " + Expr->getAsString()); if (unsigned(N) < Set.size()) Elts.insert(Set.begin() + N, Set.end()); @@ -101,7 +101,7 @@ struct TruncOp : public SetIntBinOp { RecSet &Set, int64_t N, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N < 0) - throw TGError(Loc, "Positive length required: " + + PrintFatalError(Loc, "Positive length required: " + Expr->getAsString()); if (unsigned(N) > Set.size()) N = Set.size(); @@ -138,7 +138,7 @@ struct DecimateOp : public SetIntBinOp { RecSet &Set, int64_t N, RecSet &Elts, ArrayRef<SMLoc> Loc) { if (N <= 0) - throw TGError(Loc, "Positive stride required: " + + PrintFatalError(Loc, "Positive stride required: " + Expr->getAsString()); for (unsigned I = 0; I < Set.size(); I += N) Elts.insert(Set[I]); @@ -168,35 +168,36 @@ struct SequenceOp : public SetTheory::Operator { void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { int Step = 1; if (Expr->arg_size() > 4) - throw TGError(Loc, "Bad args to (sequence \"Format\", From, To): " + + PrintFatalError(Loc, "Bad args to (sequence \"Format\", From, To): " + Expr->getAsString()); else if (Expr->arg_size() == 4) { if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) { Step = II->getValue(); } else - throw TGError(Loc, "Stride must be an integer: " + Expr->getAsString()); + PrintFatalError(Loc, "Stride must be an integer: " + + Expr->getAsString()); } std::string Format; if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0])) Format = SI->getValue(); else - throw TGError(Loc, "Format must be a string: " + Expr->getAsString()); + PrintFatalError(Loc, "Format must be a string: " + Expr->getAsString()); int64_t From, To; if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1])) From = II->getValue(); else - throw TGError(Loc, "From must be an integer: " + Expr->getAsString()); + PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString()); if (From < 0 || From >= (1 << 30)) - throw TGError(Loc, "From out of range"); + PrintFatalError(Loc, "From out of range"); if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2])) To = II->getValue(); else - throw TGError(Loc, "From must be an integer: " + Expr->getAsString()); + PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString()); if (To < 0 || To >= (1 << 30)) - throw TGError(Loc, "To out of range"); + PrintFatalError(Loc, "To out of range"); RecordKeeper &Records = cast<DefInit>(Expr->getOperator())->getDef()->getRecords(); @@ -212,7 +213,7 @@ struct SequenceOp : public SetTheory::Operator { OS << format(Format.c_str(), unsigned(From)); Record *Rec = Records.getDef(OS.str()); if (!Rec) - throw TGError(Loc, "No def named '" + Name + "': " + + PrintFatalError(Loc, "No def named '" + Name + "': " + Expr->getAsString()); // Try to reevaluate Rec in case it is a set. if (const RecVec *Result = ST.expand(Rec)) @@ -282,13 +283,13 @@ void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { // Anything else must be a DAG. DagInit *DagExpr = dyn_cast<DagInit>(Expr); if (!DagExpr) - throw TGError(Loc, "Invalid set element: " + Expr->getAsString()); + PrintFatalError(Loc, "Invalid set element: " + Expr->getAsString()); DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator()); if (!OpInit) - throw TGError(Loc, "Bad set expression: " + Expr->getAsString()); + PrintFatalError(Loc, "Bad set expression: " + Expr->getAsString()); Operator *Op = Operators.lookup(OpInit->getDef()->getName()); if (!Op) - throw TGError(Loc, "Unknown set operator: " + Expr->getAsString()); + PrintFatalError(Loc, "Unknown set operator: " + Expr->getAsString()); Op->apply(*this, DagExpr, Elts, Loc); } diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index ac833a54a7..f1a06bb528 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -675,7 +675,7 @@ Record *SubtargetEmitter::FindWriteResources( continue; } if (AliasDef) - throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases " + PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases " "defined for processor " + ProcModel.ModelName + " Ensure only one SchedAlias exists per RW."); AliasDef = AliasRW.TheDef; @@ -692,7 +692,7 @@ Record *SubtargetEmitter::FindWriteResources( if (AliasDef == (*WRI)->getValueAsDef("WriteType") || SchedWrite.TheDef == (*WRI)->getValueAsDef("WriteType")) { if (ResDef) { - throw TGError((*WRI)->getLoc(), "Resources are defined for both " + PrintFatalError((*WRI)->getLoc(), "Resources are defined for both " "SchedWrite and its alias on processor " + ProcModel.ModelName); } @@ -702,7 +702,7 @@ Record *SubtargetEmitter::FindWriteResources( // TODO: If ProcModel has a base model (previous generation processor), // then call FindWriteResources recursively with that model here. if (!ResDef) { - throw TGError(ProcModel.ModelDef->getLoc(), + PrintFatalError(ProcModel.ModelDef->getLoc(), std::string("Processor does not define resources for ") + SchedWrite.TheDef->getName()); } @@ -729,7 +729,7 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, continue; } if (AliasDef) - throw TGError(AliasRW.TheDef->getLoc(), "Multiple aliases " + PrintFatalError(AliasRW.TheDef->getLoc(), "Multiple aliases " "defined for processor " + ProcModel.ModelName + " Ensure only one SchedAlias exists per RW."); AliasDef = AliasRW.TheDef; @@ -746,7 +746,7 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, if (AliasDef == (*RAI)->getValueAsDef("ReadType") || SchedRead.TheDef == (*RAI)->getValueAsDef("ReadType")) { if (ResDef) { - throw TGError((*RAI)->getLoc(), "Resources are defined for both " + PrintFatalError((*RAI)->getLoc(), "Resources are defined for both " "SchedRead and its alias on processor " + ProcModel.ModelName); } @@ -756,7 +756,7 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, // TODO: If ProcModel has a base model (previous generation processor), // then call FindReadAdvance recursively with that model here. if (!ResDef && SchedRead.TheDef->getName() != "ReadDefault") { - throw TGError(ProcModel.ModelDef->getLoc(), + PrintFatalError(ProcModel.ModelDef->getLoc(), std::string("Processor does not define resources for ") + SchedRead.TheDef->getName()); } @@ -1053,7 +1053,7 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables, continue; std::vector<MCSchedClassDesc> &SCTab = - SchedTables.ProcSchedClasses[1 + PI - SchedModels.procModelBegin()]; + SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())]; OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup," << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n"; @@ -1098,7 +1098,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { if (PI->hasInstrSchedModel()) EmitProcessorResources(*PI, OS); else if(!PI->ProcResourceDefs.empty()) - throw TGError(PI->ModelDef->getLoc(), "SchedMachineModel defines " + PrintFatalError(PI->ModelDef->getLoc(), "SchedMachineModel defines " "ProcResources without defining WriteRes SchedWriteRes"); // Begin processor itinerary properties |