diff options
Diffstat (limited to 'lib/Target/ARM')
32 files changed, 8695 insertions, 1968 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index db270739ea..1d626d1c88 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -20,43 +20,77 @@ #include <cassert> namespace llvm { - // Enums corresponding to ARM condition codes - namespace ARMCC { - enum CondCodes { - EQ, - NE, - CS, - CC, - MI, - PL, - VS, - VC, - HI, - LS, - GE, - LT, - GT, - LE, - AL - }; + +class ARMTargetMachine; +class FunctionPass; + +// Enums corresponding to ARM condition codes +namespace ARMCC { + enum CondCodes { + EQ, + NE, + HS, + LO, + MI, + PL, + VS, + VC, + HI, + LS, + GE, + LT, + GT, + LE, + AL + }; + + inline static CondCodes getOppositeCondition(CondCodes CC){ + switch (CC) { + default: assert(0 && "Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } } +} - namespace ARMShift { - enum ShiftTypes { - LSL, - LSR, - ASR, - ROR, - RRX - }; +inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { + switch (CC) { + default: assert(0 && "Unknown condition code"); + case ARMCC::EQ: return "eq"; + case ARMCC::NE: return "ne"; + case ARMCC::HS: return "hs"; + case ARMCC::LO: return "lo"; + case ARMCC::MI: return "mi"; + case ARMCC::PL: return "pl"; + case ARMCC::VS: return "vs"; + case ARMCC::VC: return "vc"; + case ARMCC::HI: return "hi"; + case ARMCC::LS: return "ls"; + case ARMCC::GE: return "ge"; + case ARMCC::LT: return "lt"; + case ARMCC::GT: return "gt"; + case ARMCC::LE: return "le"; + case ARMCC::AL: return "al"; } +} - class FunctionPass; - class TargetMachine; +FunctionPass *createARMISelDag(ARMTargetMachine &TM); +FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM); +FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMConstantIslandPass(); - FunctionPass *createARMISelDag(TargetMachine &TM); - FunctionPass *createARMCodePrinterPass(std::ostream &OS, TargetMachine &TM); - FunctionPass *createARMFixMulPass(); } // end namespace llvm; // Defines symbolic names for ARM registers. This defines a mapping from diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 40153493d8..6faf938fca 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -18,6 +18,73 @@ include "../Target.td" //===----------------------------------------------------------------------===// +// ARM Subtarget features. +// + +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true", + "Enable VFP2 instructions ">; + +//===----------------------------------------------------------------------===// +// ARM Processors supported. +// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +// V4 Processors. +def : Proc<"generic", []>; +def : Proc<"arm8", []>; +def : Proc<"arm810", []>; +def : Proc<"strongarm", []>; +def : Proc<"strongarm110", []>; +def : Proc<"strongarm1100", []>; +def : Proc<"strongarm1110", []>; + +// V4T Processors. +def : Proc<"arm7tdmi", [ArchV4T]>; +def : Proc<"arm7tdmi-s", [ArchV4T]>; +def : Proc<"arm710t", [ArchV4T]>; +def : Proc<"arm720t", [ArchV4T]>; +def : Proc<"arm9", [ArchV4T]>; +def : Proc<"arm9tdmi", [ArchV4T]>; +def : Proc<"arm920", [ArchV4T]>; +def : Proc<"arm920t", [ArchV4T]>; +def : Proc<"arm922t", [ArchV4T]>; +def : Proc<"arm940t", [ArchV4T]>; +def : Proc<"ep9312", [ArchV4T]>; + +// V5T Processors. +def : Proc<"arm10tdmi", [ArchV5T]>; +def : Proc<"arm1020t", [ArchV5T]>; + +// V5TE Processors. +def : Proc<"arm9e", [ArchV5TE]>; +def : Proc<"arm946e-s", [ArchV5TE]>; +def : Proc<"arm966e-s", [ArchV5TE]>; +def : Proc<"arm968e-s", [ArchV5TE]>; +def : Proc<"arm10e", [ArchV5TE]>; +def : Proc<"arm1020e", [ArchV5TE]>; +def : Proc<"arm1022e", [ArchV5TE]>; +def : Proc<"xscale", [ArchV5TE]>; +def : Proc<"iwmmxt", [ArchV5TE]>; + +// V6 Processors. +def : Proc<"arm1136j-s", [ArchV6]>; +def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"arm1176jz-s", [ArchV6]>; +def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; +def : Proc<"mpcorenovfp", [ArchV6]>; +def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; + +//===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -31,8 +98,14 @@ include "ARMInstrInfo.td" def ARMInstrInfo : InstrInfo { // Define how we want to layout our target-specific information field. - let TSFlagsFields = []; - let TSFlagsShifts = []; + let TSFlagsFields = ["AddrModeBits", + "SizeFlag", + "IndexModeBits", + "Opcode"]; + let TSFlagsShifts = [0, + 4, + 7, + 9]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h new file mode 100644 index 0000000000..3f47a69471 --- /dev/null +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -0,0 +1,394 @@ +//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM addressing mode implementation stuff. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H +#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H + +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> + +namespace llvm { + +/// ARM_AM - ARM Addressing Mode Stuff +namespace ARM_AM { + enum ShiftOpc { + no_shift = 0, + asr, + lsl, + lsr, + ror, + rrx + }; + + enum AddrOpc { + add = '+', sub = '-' + }; + + static inline const char *getShiftOpcStr(ShiftOpc Op) { + switch (Op) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return "asr"; + case ARM_AM::lsl: return "lsl"; + case ARM_AM::lsr: return "lsr"; + case ARM_AM::ror: return "ror"; + case ARM_AM::rrx: return "rrx"; + } + } + + static inline ShiftOpc getShiftOpcForNode(SDOperand N) { + switch (N.getOpcode()) { + default: return ARM_AM::no_shift; + case ISD::SHL: return ARM_AM::lsl; + case ISD::SRL: return ARM_AM::lsr; + case ISD::SRA: return ARM_AM::asr; + case ISD::ROTR: return ARM_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ARMISD::RRX: return ARM_AM::rrx; + } + } + + enum AMSubMode { + bad_am_submode = 0, + ia, + ib, + da, + db + }; + + static inline const char *getAMSubModeStr(AMSubMode Mode) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return "ia"; + case ARM_AM::ib: return "ib"; + case ARM_AM::da: return "da"; + case ARM_AM::db: return "db"; + } + } + + static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::ia: return isLD ? "fd" : "ea"; + case ARM_AM::ib: return isLD ? "ed" : "fa"; + case ARM_AM::da: return isLD ? "fa" : "ed"; + case ARM_AM::db: return isLD ? "ea" : "fd"; + } + } + + /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. + /// + static inline unsigned rotr32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val >> Amt) | (Val << ((32-Amt)&31)); + } + + /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. + /// + static inline unsigned rotl32(unsigned Val, unsigned Amt) { + assert(Amt < 32 && "Invalid rotate amount"); + return (Val << Amt) | (Val >> ((32-Amt)&31)); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #1: shift_operand with registers + //===--------------------------------------------------------------------===// + // + // This 'addressing mode' is used for arithmetic instructions. It can + // represent things like: + // reg + // reg [asr|lsl|lsr|ror|rrx] reg + // reg [asr|lsl|lsr|ror|rrx] imm + // + // This is stored three operands [rega, regb, opc]. The first is the base + // reg, the second is the shift amount (or reg0 if not present or imm). The + // third operand encodes the shift opcode and the imm if a reg isn't present. + // + static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { + return ShOp | (Imm << 3); + } + static inline unsigned getSORegOffset(unsigned Op) { + return Op >> 3; + } + static inline ShiftOpc getSORegShOp(unsigned Op) { + return (ShiftOpc)(Op & 7); + } + + /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return + /// the 8-bit imm value. + static inline unsigned getSOImmValImm(unsigned Imm) { + return Imm & 0xFF; + } + /// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return + /// the rotate amount. + static inline unsigned getSOImmValRot(unsigned Imm) { + return (Imm >> 8) * 2; + } + + /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, + /// computing the rotate amount to use. If this immediate value cannot be + /// handled with a single shifter-op, determine a good rotate amount that will + /// take a maximal chunk of bits out of the immediate. + static inline unsigned getSOImmValRotate(unsigned Imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the rotate amount. + unsigned TZ = CountTrailingZeros_32(Imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned RotAmt = TZ & ~1; + + // If we can handle this spread, return it. + if ((rotr32(Imm, RotAmt) & ~255U) == 0) + return (32-RotAmt)&31; // HW rotates right, not left. + + // For values like 0xF000000F, we should skip the first run of ones, then + // retry the hunt. + if (Imm & 1) { + unsigned TrailingOnes = CountTrailingZeros_32(~Imm); + if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF + // Restart the search for a high-order bit after the initial seconds of + // ones. + unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); + + // Rotate amount must be even. + unsigned RotAmt2 = TZ2 & ~1; + + // If this fits, use it. + if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. + } + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32-RotAmt)&31; // HW rotates right, not left. + } + + /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit + /// into an shifter_operand immediate operand, return the 12-bit encoding for + /// it. If not, return -1. + static inline int getSOImmVal(unsigned Arg) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((Arg & ~255U) == 0) return Arg; + + unsigned RotAmt = getSOImmValRotate(Arg); + + // If this cannot be handled with a single shifter_op, bail out. + if (rotr32(~255U, RotAmt) & Arg) + return -1; + + // Encode this correctly. + return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8); + } + + /// isSOImmTwoPartVal - Return true if the specified value can be obtained by + /// or'ing together two SOImmVal's. + static inline bool isSOImmTwoPartVal(unsigned V) { + // If this can be handled with a single shifter_op, bail out. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled with two shifter_op's, accept. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + return V == 0; + } + + /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, + /// return the first chunk of it. + static inline unsigned getSOImmTwoPartFirst(unsigned V) { + return rotr32(255U, getSOImmValRotate(V)) & V; + } + + /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, + /// return the second chunk of it. + static inline unsigned getSOImmTwoPartSecond(unsigned V) { + // Mask out the first hunk. + V = rotr32(~255U, getSOImmValRotate(V)) & V; + + // Take what's left. + assert(V == (rotr32(255U, getSOImmValRotate(V)) & V)); + return V; + } + + /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed + /// by a left shift. Returns the shift amount to use. + static inline unsigned getThumbImmValShift(unsigned Imm) { + // 8-bit (or less) immediates are trivially immediate operand with a shift + // of zero. + if ((Imm & ~255U) == 0) return 0; + + // Use CTZ to compute the shift amount. + return CountTrailingZeros_32(Imm); + } + + /// isThumbImmShiftedVal - Return true if the specified value can be obtained + /// by left shifting a 8-bit immediate. + static inline bool isThumbImmShiftedVal(unsigned V) { + // If this can be handled with + V = (~255U << getThumbImmValShift(V)) & V; + return V == 0; + } + + /// getThumbImmNonShiftedVal - If V is a value that satisfies + /// isThumbImmShiftedVal, return the non-shiftd value. + static inline unsigned getThumbImmNonShiftedVal(unsigned V) { + return V >> getThumbImmValShift(V); + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #2 + //===--------------------------------------------------------------------===// + // + // This is used for most simple load/store instructions. + // + // addrmode2 := reg +/- reg shop imm + // addrmode2 := reg +/- imm12 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // + // If this addressing mode is a frame index (before prolog/epilog insertion + // and code rewriting), this operand will have the form: FI#, reg0, <offs> + // with no shift amount for the frame offset. + // + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + assert(Imm12 < (1 << 12) && "Imm too large!"); + bool isSub = Opc == sub; + return Imm12 | ((int)isSub << 12) | (SO << 13); + } + static inline unsigned getAM2Offset(unsigned AM2Opc) { + return AM2Opc & ((1 << 12)-1); + } + static inline AddrOpc getAM2Op(unsigned AM2Opc) { + return ((AM2Opc >> 12) & 1) ? sub : add; + } + static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { + return (ShiftOpc)(AM2Opc >> 13); + } + + + //===--------------------------------------------------------------------===// + // Addressing Mode #3 + //===--------------------------------------------------------------------===// + // + // This is used for sign-extending loads, and load/store-pair instructions. + // + // addrmode3 := reg +/- reg + // addrmode3 := reg +/- imm8 + // + // The first operand is always a Reg. The second operand is a reg if in + // reg/reg form, otherwise it's reg#0. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + + /// getAM3Opc - This function encodes the addrmode3 opc field. + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM3Offset(unsigned AM3Opc) { + return AM3Opc & 0xFF; + } + static inline AddrOpc getAM3Op(unsigned AM3Opc) { + return ((AM3Opc >> 8) & 1) ? sub : add; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #4 + //===--------------------------------------------------------------------===// + // + // This is used for load / store multiple instructions. + // + // addrmode4 := reg, <mode> + // + // The four modes are: + // IA - Increment after + // IB - Increment before + // DA - Decrement after + // DB - Decrement before + // + // If the 4th bit (writeback)is set, then the base register is updated after + // the memory transfer. + + static inline AMSubMode getAM4SubMode(unsigned Mode) { + return (AMSubMode)(Mode & 0x7); + } + + static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { + return (int)SubMode | ((int)WB << 3); + } + + static inline bool getAM4WBFlag(unsigned Mode) { + return (Mode >> 3) & 1; + } + + //===--------------------------------------------------------------------===// + // Addressing Mode #5 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as FP load/stores. + // + // addrmode5 := reg +/- imm8*4 + // + // The first operand is always a Reg. The third field encodes the operation + // in bit 8, the immediate in bits 0-7. + // + // This can also be used for FP load/store multiple ops. The third field encodes + // writeback mode in bit 8, the number of registers (or 2 times the number of + // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the + // following two sub-modes: + // + // IA - Increment after + // DB - Decrement before + + /// getAM5Opc - This function encodes the addrmode5 opc field. + static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + + /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and + /// FSTM instructions. + static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, + unsigned char Offset) { + assert((SubMode == ia || SubMode == db) && + "Illegal addressing mode 5 sub-mode!"); + return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + } + static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { + return (AMSubMode)((AM5Opc >> 9) & 0x7); + } + static inline bool getAM5WBFlag(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1); + } + +} // end namespace ARM_AM +} // end namespace llvm + +#endif + diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 653d5007ab..a6ceb85cb3 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -15,54 +15,49 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMTargetMachine.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineDebugInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" #include <cctype> +#include <iostream> +#include <set> using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { - static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { - switch (CC) { - default: assert(0 && "Unknown condition code"); - case ARMCC::EQ: return "eq"; - case ARMCC::NE: return "ne"; - case ARMCC::CS: return "cs"; - case ARMCC::CC: return "cc"; - case ARMCC::MI: return "mi"; - case ARMCC::PL: return "pl"; - case ARMCC::VS: return "vs"; - case ARMCC::VC: return "vc"; - case ARMCC::HI: return "hi"; - case ARMCC::LS: return "ls"; - case ARMCC::GE: return "ge"; - case ARMCC::LT: return "lt"; - case ARMCC::GT: return "gt"; - case ARMCC::LE: return "le"; - case ARMCC::AL: return "al"; - } - } - struct VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { ARMAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T) - : AsmPrinter(O, TM, T) { + : AsmPrinter(O, TM, T), DW(O, this, T), AFI(NULL), InCPMode(false) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); } + DwarfWriter DW; + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when printing asm code for different targets. + const ARMSubtarget *Subtarget; + + /// AFI - Keep a pointer to ARMFunctionInfo for the current + /// MachineFunction + ARMFunctionInfo *AFI; + /// We name each basic block in a Function with a unique number, so /// that we can consistently refer to them later. This is cleared /// at the beginning of each call to runOnMachineFunction(). @@ -70,22 +65,79 @@ namespace { typedef std::map<const Value *, unsigned> ValueMapTy; ValueMapTy NumberForBB; + /// Keeps the set of GlobalValues that require non-lazy-pointers for + /// indirect access. + std::set<std::string> GVNonLazyPtrs; + + /// Keeps the set of external function GlobalAddresses that the asm + /// printer should generate stubs for. + std::set<std::string> FnStubs; + + /// True if asm printer is printing a series of CONSTPOOL_ENTRY. + bool InCPMode; + virtual const char *getPassName() const { return "ARM Assembly Printer"; } - void printAddrMode1(const MachineInstr *MI, int opNum); - void printAddrMode2(const MachineInstr *MI, int opNum); - void printAddrMode5(const MachineInstr *MI, int opNum); - void printOperand(const MachineInstr *MI, int opNum); - void printMemOperand(const MachineInstr *MI, int opNum, - const char *Modifier = 0); + void printOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printSOImmOperand(const MachineInstr *MI, int opNum); + void printSORegOperand(const MachineInstr *MI, int opNum); + void printAddrMode2Operand(const MachineInstr *MI, int OpNo); + void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode3Operand(const MachineInstr *MI, int OpNo); + void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo); + void printAddrMode4Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrMode5Operand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printAddrModePCOperand(const MachineInstr *MI, int OpNo, + const char *Modifier = 0); + void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo, + unsigned Scale); + void printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int OpNo); + void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo); void printCCOperand(const MachineInstr *MI, int opNum); + void printPCLabel(const MachineInstr *MI, int opNum); + void printRegisterList(const MachineInstr *MI, int opNum); + void printCPInstOperand(const MachineInstr *MI, int opNum, + const char *Modifier); + void printJTBlockOperand(const MachineInstr *MI, int opNum); + + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); bool printInstruction(const MachineInstr *MI); // autogenerated. + void printMachineInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); bool doInitialization(Module &M); bool doFinalization(Module &M); + + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + printDataDirective(MCPV->getType()); + + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MCPV; + std::string Name = Mang->getValueName(ACPV->getGV()); + if (ACPV->isNonLazyPointer()) { + GVNonLazyPtrs.insert(Name); + O << TAI->getPrivateGlobalPrefix() << Name << "$non_lazy_ptr"; + } else + O << Name; + if (ACPV->getPCAdjustment() != 0) + O << "-(" << TAI->getPrivateGlobalPrefix() << "PC" + << utostr(ACPV->getLabelId()) + << "+" << (unsigned)ACPV->getPCAdjustment() << ")"; + O << "\n"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDebugInfo>(); + } }; } // end of anonymous namespace @@ -97,55 +149,64 @@ namespace { /// regardless of whether the function is in SSA form. /// FunctionPass *llvm::createARMCodePrinterPass(std::ostream &o, - TargetMachine &tm) { + ARMTargetMachine &tm) { return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo()); } -/// runOnMachineFunction - This uses the printMachineInstruction() +/// runOnMachineFunction - This uses the printInstruction() /// method to print assembly for each instruction. /// bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - O << "\n\n"; - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - const std::vector<MachineConstantPoolEntry> - &CP = MF.getConstantPool()->getConstants(); - for (unsigned i = 0, e = CP.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = CP[i]; - if (!CPE.isMachineConstantPoolEntry()){ - Constant *CV = CPE.Val.ConstVal; - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { - if (GV->hasExternalWeakLinkage()) { - ExtWeakSymbols.insert(GV); - } - } - } + AFI = MF.getInfo<ARMFunctionInfo>(); + + if (Subtarget->isDarwin()) { + DW.SetDebugInfo(&getAnalysis<MachineDebugInfo>()); } - // Print out jump tables referenced by the function - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + SetupMachineFunction(MF); + O << "\n"; + + // NOTE: we don't print out constant pools here, they are handled as + // instructions. + O << "\n"; // Print out labels for the function. const Function *F = MF.getFunction(); - SwitchToTextSection(getSectionForFunction(*F).c_str(), F); - switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); case Function::InternalLinkage: + SwitchToTextSection("\t.text", F); break; case Function::ExternalLinkage: + SwitchToTextSection("\t.text", F); O << "\t.globl\t" << CurrentFnName << "\n"; break; case Function::WeakLinkage: case Function::LinkOnceLinkage: - O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + if (Subtarget->isDarwin()) { + SwitchToTextSection( + ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F); + O << "\t.globl\t" << CurrentFnName << "\n"; + O << "\t.weak_definition\t" << CurrentFnName << "\n"; + } else { + O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + } break; } - EmitAlignment(2, F); + + if (AFI->isThumbFunction()) { + EmitAlignment(1, F); + O << "\t.code\t16\n"; + O << "\t.thumb_func\t" << CurrentFnName << "\n"; + InCPMode = false; + } else + EmitAlignment(2, F); + O << CurrentFnName << ":\n"; + if (Subtarget->isDarwin()) { + // Emit pre-function debug information. + DW.BeginFunction(&MF); + } // Print out code for the function. for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); @@ -158,127 +219,340 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { // Print the assembly for the instruction. - O << "\t"; - ++EmittedInsts; - printInstruction(II); - } - } - - return false; -} - -void ARMAsmPrinter::printAddrMode1(const MachineInstr *MI, int opNum) { - const MachineOperand &Arg = MI->getOperand(opNum); - const MachineOperand &Shift = MI->getOperand(opNum + 1); - const MachineOperand &ShiftType = MI->getOperand(opNum + 2); - - if(Arg.isImmediate()) { - assert(Shift.getImmedValue() == 0); - printOperand(MI, opNum); - } else { - assert(Arg.isRegister()); - printOperand(MI, opNum); - if(Shift.isRegister() || Shift.getImmedValue() != 0) { - const char *s = NULL; - switch(ShiftType.getImmedValue()) { - case ARMShift::LSL: - s = ", lsl "; - break; - case ARMShift::LSR: - s = ", lsr "; - break; - case ARMShift::ASR: - s = ", asr "; - break; - case ARMShift::ROR: - s = ", ror "; - break; - case ARMShift::RRX: - s = ", rrx "; - break; - } - O << s; - printOperand(MI, opNum + 1); + printMachineInstruction(II); } } -} -void ARMAsmPrinter::printAddrMode2(const MachineInstr *MI, int opNum) { - const MachineOperand &Arg = MI->getOperand(opNum); - const MachineOperand &Offset = MI->getOperand(opNum + 1); - assert(Offset.isImmediate()); + if (TAI->hasDotTypeDotSizeDirective()) + O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n"; - if (Arg.isConstantPoolIndex()) { - assert(Offset.getImmedValue() == 0); - printOperand(MI, opNum); - } else { - assert(Arg.isRegister()); - O << '['; - printOperand(MI, opNum); - O << ", "; - printOperand(MI, opNum + 1); - O << ']'; + if (Subtarget->isDarwin()) { + // Emit post-function debug information. + DW.EndFunction(); } -} - -void ARMAsmPrinter::printAddrMode5(const MachineInstr *MI, int opNum) { - const MachineOperand &Arg = MI->getOperand(opNum); - const MachineOperand &Offset = MI->getOperand(opNum + 1); - assert(Offset.isImmediate()); - if (Arg.isConstantPoolIndex()) { - assert(Offset.getImmedValue() == 0); - printOperand(MI, opNum); - } else { - assert(Arg.isRegister()); - O << '['; - printOperand(MI, opNum); - O << ", "; - printOperand(MI, opNum + 1); - O << ']'; - } + return false; } -void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { - const MachineOperand &MO = MI->getOperand (opNum); - const MRegisterInfo &RI = *TM.getRegisterInfo(); +void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: if (MRegisterInfo::isPhysicalRegister(MO.getReg())) - O << LowercaseString (RI.get(MO.getReg()).Name); + O << TM.getRegisterInfo()->get(MO.getReg()).Name; else assert(0 && "not implemented"); break; - case MachineOperand::MO_Immediate: - O << "#" << (int)MO.getImmedValue(); + case MachineOperand::MO_Immediate: { + if (!Modifier || strcmp(Modifier, "no_hash") != 0) + O << "#"; + + O << (int)MO.getImmedValue(); break; + } case MachineOperand::MO_MachineBasicBlock: printBasicBlockLabel(MO.getMachineBasicBlock()); return; case MachineOperand::MO_GlobalAddress: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); GlobalValue *GV = MO.getGlobal(); std::string Name = Mang->getValueName(GV); - O << Name; - if (GV->hasExternalWeakLinkage()) { + bool isExt = (GV->isExternal() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + if (isExt && isCallOp && Subtarget->isDarwin() && + TM.getRelocationModel() != Reloc::Static) { + O << TAI->getPrivateGlobalPrefix() << Name << "$stub"; + FnStubs.insert(Name); + } else + O << Name; + + if (GV->hasExternalWeakLinkage()) ExtWeakSymbols.insert(GV); - } - } break; - case MachineOperand::MO_ExternalSymbol: - O << TAI->getGlobalPrefix() << MO.getSymbolName(); + } + case MachineOperand::MO_ExternalSymbol: { + bool isCallOp = Modifier && !strcmp(Modifier, "call"); + std::string Name(TAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + if (isCallOp && Subtarget->isDarwin() && + TM.getRelocationModel() != Reloc::Static) { + O << TAI->getPrivateGlobalPrefix() << Name << "$stub"; + FnStubs.insert(Name); + } else + O << Name; break; + } case MachineOperand::MO_ConstantPoolIndex: O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getConstantPoolIndex(); break; + case MachineOperand::MO_JumpTableIndex: + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getJumpTableIndex(); + break; default: O << "<unknown operand type>"; abort (); break; } } -void ARMAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - const char *Modifier) { - assert(0 && "not implemented"); +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isImmediate() && (MO.getImmedValue() < (1 << 12)) && + "Not a valid so_imm value!"); + unsigned Imm = ARM_AM::getSOImmValImm(MO.getImmedValue()); + unsigned Rot = ARM_AM::getSOImmValRot(MO.getImmedValue()); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + O << ' ' << TAI->getCommentString() << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + } else { + O << "#" << Imm; + } +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(MRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << TM.getRegisterInfo()->get(MO1.getReg()).Name; + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImmedValue())) + << " "; + + if (MO2.getReg()) { + assert(MRegisterInfo::isPhysicalRegister(MO2.getReg())); + O << TM.getRegisterInfo()->get(MO2.getReg()).Name; + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + +void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).Name; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImmedValue())) + << " #" << ShImm; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.getReg()) { + if (ARM_AM::getAM2Offset(MO2.getImm())) // Don't print +0. + O << "#" + << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ARM_AM::getAM2Offset(MO2.getImm()); + return; + } + + O << (char)ARM_AM::getAM2Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).Name; + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImmedValue())) + << " #" << ShImm; +} + +void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + const MachineOperand &MO3 = MI->getOperand(Op+2); + + assert(MRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + + if (MO2.getReg()) { + O << ", " + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << TM.getRegisterInfo()->get(MO2.getReg()).Name + << "]"; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ImmOffs; + O << "]"; +} + +void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << TM.getRegisterInfo()->get(MO1.getReg()).Name; + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + O << "#" + << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ImmOffs; +} + +void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Modifier && strcmp(Modifier, "submode") == 0) { + if (MO1.getReg() == ARM::SP) { + bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_RET); + O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + } else { + printOperand(MI, Op); + if (ARM_AM::getAM4WBFlag(MO2.getImm())) + O << "!"; + } +} + +void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, + const char *Modifier) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + assert(MRegisterInfo::isPhysicalRegister(MO1.getReg())); + + if (Modifier && strcmp(Modifier, "submode") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); + if (MO1.getReg() == ARM::SP) { + bool isFLDM = (MI->getOpcode() == ARM::FLDMD || + MI->getOpcode() == ARM::FLDMS); + O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + return; + } else if (Modifier && strcmp(Modifier, "base") == 0) { + // Used for FSTM{D|S} and LSTM{D|S} operations. + O << TM.getRegisterInfo()->get(MO1.getReg()).Name; + if (ARM_AM::getAM5WBFlag(MO2.getImm())) + O << "!"; + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ImmOffs*4; + } + O << "]"; +} + +void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, + const char *Modifier) { + if (Modifier && strcmp(Modifier, "label") == 0) { + printPCLabel(MI, Op+1); + return; + } + + const MachineOperand &MO1 = MI->getOperand(Op); + assert(MRegisterInfo::isPhysicalRegister(MO1.getReg())); + O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).Name << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).Name << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, + unsigned Scale) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + + if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + if (unsigned ImmOffs = MO2.getImm()) { + O << ", #" << ImmOffs; + if (Scale > 1) + O << " * " << Scale; + } + O << "]"; +} + +void +ARMAsmPrinter::printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 1); +} +void +ARMAsmPrinter::printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 2); +} +void +ARMAsmPrinter::printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int Op) { + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { + const MachineOperand &MO1 = MI->getOperand(Op); + const MachineOperand &MO2 = MI->getOperand(Op+1); + O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name; + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs << " * 4"; + O << "]"; } void ARMAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { @@ -286,9 +560,140 @@ void ARMAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { O << ARMCondCodeToString((ARMCC::CondCodes)CC); } +void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) { + int Id = (int)MI->getOperand(opNum).getImmedValue(); + O << TAI->getPrivateGlobalPrefix() << "PC" << Id; +} + +void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) { + O << "{"; + for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) { + printOperand(MI, i); + if (i != e-1) O << ", "; + } + O << "}"; +} + +void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo, + const char *Modifier) { + assert(Modifier && "This operand only works with a modifier!"); + // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the + // data itself. + if (!strcmp(Modifier, "label")) { + unsigned ID = MI->getOperand(OpNo).getImm(); + O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << ID << ":\n"; + } else { + assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); + unsigned CPI = MI->getOperand(OpNo).getConstantPoolIndex(); + + const MachineConstantPoolEntry &MCPE = // Chasing pointers is fun? + MI->getParent()->getParent()->getConstantPool()->getConstants()[CPI]; + + if (MCPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + else + EmitGlobalConstant(MCPE.Val.ConstVal); + } +} + +void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id + unsigned JTI = MO1.getJumpTableIndex(); + O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImmedValue() << ":\n"; + + const char *JTEntryDirective = TAI->getJumpTableDirective(); + if (!JTEntryDirective) + JTEntryDirective = TAI->getData32bitsDirective(); + + const MachineFunction *MF = MI->getParent()->getParent(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; + std::set<MachineBasicBlock*> JTSets; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (UseSet && JTSets.insert(MBB).second) + printSetLabel(JTI, MO2.getImmedValue(), MBB); + + O << JTEntryDirective << ' '; + if (UseSet) + O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImmedValue() + << "_set_" << MBB->getNumber(); + else if (TM.getRelocationModel() == Reloc::PIC_) { + printBasicBlockLabel(MBB, false, false); + // If the arch uses custom Jump Table directives, don't calc relative to JT + if (!TAI->getJumpTableDirective()) + O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << JTI << '_' << MO2.getImmedValue(); + } else + printBasicBlockLabel(MBB, false, false); + O << '\n'; + } +} + + +bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode){ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'Q': + if (TM.getTargetData()->isLittleEndian()) + break; + // Fallthrough + case 'R': + if (TM.getTargetData()->isBigEndian()) + break; + // Fallthrough + case 'H': // Write second word of DI / DF reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isRegister() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isRegister()) + return true; + ++OpNo; // Return the high-part. + } + } + + printOperand(MI, OpNo); + return false; +} + +void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) { + if (!InCPMode && AFI->isThumbFunction()) { + EmitAlignment(2); + InCPMode = true; + } + } else { + if (InCPMode && AFI->isThumbFunction()) { + EmitAlignment(1); + InCPMode = false; + } + O << "\t"; + } + + // Call the autogenerated instruction printer routines. + printInstruction(MI); +} + bool ARMAsmPrinter::doInitialization(Module &M) { - AsmPrinter::doInitialization(M); - return false; // success + if (Subtarget->isDarwin()) { + // Emit initial debug information. + DW.BeginModule(&M); + } + + return AsmPrinter::doInitialization(M); } bool ARMAsmPrinter::doFinalization(Module &M) { @@ -302,53 +707,154 @@ bool ARMAsmPrinter::doFinalization(Module &M) { if (EmitSpecialLLVMGlobal(I)) continue; - O << "\n\n"; std::string name = Mang->getValueName(I); Constant *C = I->getInitializer(); unsigned Size = TD->getTypeSize(C->getType()); - unsigned Align = Log2_32(TD->getTypeAlignment(C->getType())); + unsigned Align = TD->getPreferredAlignmentLog(I); if (C->isNullValue() && !I->hasSection() && - (I->hasLinkOnceLinkage() || I->hasInternalLinkage() || - I->hasWeakLinkage())) { - SwitchToDataSection(".data", I); - if (I->hasInternalLinkage()) - O << "\t.local " << name << "\n"; - - O << "\t.comm " << name << "," << Size - << "," << (unsigned) (1 << Align); - O << "\n"; + (I->hasInternalLinkage() || I->hasWeakLinkage() || + I->hasLinkOnceLinkage() || + (Subtarget->isDarwin() && I->hasExternalLinkage()))) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + if (I->hasExternalLinkage()) { + O << "\t.globl\t" << name << "\n"; + O << "\t.zerofill __DATA__, __common, " << name << ", " + << Size << ", " << Align; + } else { + SwitchToDataSection(TAI->getDataSection(), I); + if (TAI->getLCOMMDirective() != NULL) { + if (I->hasInternalLinkage()) { + O << TAI->getLCOMMDirective() << name << "," << Size; + if (Subtarget->isDarwin()) + O << "," << Align; + } else + O << TAI->getCOMMDirective() << name << "," << Size; + } else { + if (I->hasInternalLinkage()) + O << "\t.local\t" << name << "\n"; + O << TAI->getCOMMDirective() << name << "," << Size; + if (TAI->getCOMMDirectiveTakesAlignment()) + O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } + O << "\t\t" << TAI->getCommentString() << " " << I->getName() << "\n"; + continue; } else { switch (I->getLinkage()) { default: assert(0 && "Unknown linkage type!"); break; + case GlobalValue::LinkOnceLinkage: + case GlobalValue::WeakLinkage: + if (Subtarget->isDarwin()) { + O << "\t.globl " << name << "\n" + << "\t.weak_definition " << name << "\n"; + SwitchToDataSection("\t.section __DATA,__const_coal,coalesced", I); + } else { + O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\",@progbits\n" + << "\t.weak " << name << "\n"; + } + break; case GlobalValue::ExternalLinkage: O << "\t.globl " << name << "\n"; - break; + // FALL THROUGH case GlobalValue::InternalLinkage: - break; - } - - if (I->hasSection() && - (I->getSection() == ".ctors" || - I->getSection() == ".dtors")) { - std::string SectionName = ".section " + I->getSection(); + if (I->isConstant()) { + const ConstantArray *CVA = dyn_cast<ConstantArray>(C); + if (TAI->getCStringSection() && CVA && CVA->isCString()) { + SwitchToDataSection(TAI->getCStringSection(), I); + break; + } + } - SectionName += ",\"aw\",%progbits"; + if (I->hasSection() && + (I->getSection() == ".ctors" || + I->getSection() == ".dtors")) { + assert(!Subtarget->isDarwin()); + std::string SectionName = ".section " + I->getSection(); + SectionName += ",\"aw\",@progbits"; + SwitchToDataSection(SectionName.c_str()); + } else { + SwitchToDataSection(TAI->getDataSection(), I); + } - SwitchToDataSection(SectionName.c_str()); - } else { - SwitchToDataSection(TAI->getDataSection(), I); + break; } + } - EmitAlignment(Align, I); + EmitAlignment(Align, I); + if (TAI->hasDotTypeDotSizeDirective()) { O << "\t.type " << name << ", %object\n"; O << "\t.size " << name << ", " << Size << "\n"; - O << name << ":\n"; - EmitGlobalConstant(C); } + O << name << ":\n"; + + // If the initializer is a extern weak symbol, remember to emit the weak + // reference! + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + if (GV->hasExternalWeakLinkage()) + ExtWeakSymbols.insert(GV); + + EmitGlobalConstant(C); + O << '\n'; + } + + if (Subtarget->isDarwin()) { + // Output stubs for dynamically-linked functions + unsigned j = 1; + for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end(); + i != e; ++i, ++j) { + if (TM.getRelocationModel() == Reloc::PIC_) + SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs," + "none,16", 0); + else + SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs," + "none,12", 0); + + EmitAlignment(2); + O << "\t.code\t32\n"; + + O << "L" << *i << "$stub:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\tldr ip, L" << *i << "$slp\n"; + if (TM.getRelocationModel() == Reloc::PIC_) { + O << "L" << *i << "$scv:\n"; + O << "\tadd ip, pc, ip\n"; + } + O << "\tldr pc, [ip, #0]\n"; + O << "L" << *i << "$slp:\n"; + if (TM.getRelocationModel() == Reloc::PIC_) + O << "\t.long\tL" << *i << "$lazy_ptr-(L" << *i << "$scv+8)\n"; + else + O << "\t.long\tL" << *i << "$lazy_ptr\n"; + SwitchToDataSection(".lazy_symbol_pointer", 0); + O << "L" << *i << "$lazy_ptr:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\t.long\tdyld_stub_binding_helper\n"; + } + O << "\n"; + + // Output non-lazy-pointers for external and common global variables. + if (GVNonLazyPtrs.begin() != GVNonLazyPtrs.end()) + SwitchToDataSection(".non_lazy_symbol_pointer", 0); + for (std::set<std::string>::iterator i = GVNonLazyPtrs.begin(), + e = GVNonLazyPtrs.end(); i != e; ++i) { + O << "L" << *i << "$non_lazy_ptr:\n"; + O << "\t.indirect_symbol " << *i << "\n"; + O << "\t.long\t0\n"; + } + + // Emit initial debug information. + DW.EndModule(); + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; } AsmPrinter::doFinalization(M); diff --git a/lib/Target/ARM/ARMCommon.cpp b/lib/Target/ARM/ARMCommon.cpp deleted file mode 100644 index fd3757303b..0000000000 --- a/lib/Target/ARM/ARMCommon.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//===-- ARMCommon.cpp - Define support functions for ARM --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by the "Instituto Nokia de Tecnologia" and -// is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// -#include "ARMCommon.h" - -static inline unsigned rotateL(unsigned x, unsigned n){ - return ((x << n) | (x >> (32 - n))); -} - -static inline unsigned rotateR(unsigned x, unsigned n){ - return ((x >> n) | (x << (32 - n))); -} - -// finds the end position of largest sequence of zeros in binary representation -// of 'immediate'. -static int findLargestZeroSequence(unsigned immediate){ - int max_zero_pos = 0; - int max_zero_length = 0; - int zero_pos; - int zero_length; - int pos = 0; - int end_pos; - - while ((immediate & 0x3) == 0) { - immediate = rotateR(immediate, 2); - pos+=2; - } - end_pos = pos+32; - - while (pos<end_pos){ - while (((immediate & 0x3) != 0)&&(pos<end_pos)) { - immediate = rotateR(immediate, 2); - pos+=2; - } - zero_pos = pos; - while (((immediate & 0x3) == 0)&&(pos<end_pos)) { - immediate = rotateR(immediate, 2); - pos+=2; - } - zero_length = pos - zero_pos; - if (zero_length > max_zero_length){ - max_zero_length = zero_length; - max_zero_pos = zero_pos % 32; - } - - } - - return (max_zero_pos + max_zero_length) % 32; -} - -std::vector<unsigned> splitImmediate(unsigned immediate){ - std::vector<unsigned> immediatePieces; - - if (immediate == 0){ - immediatePieces.push_back(0); - } else { - int start_pos = findLargestZeroSequence(immediate); - unsigned immediate_tmp = rotateR(immediate, start_pos); - int pos = 0; - while (pos < 32){ - while(((immediate_tmp&0x3) == 0)&&(pos<32)){ - immediate_tmp = rotateR(immediate_tmp,2); - pos+=2; - } - if (pos < 32){ - immediatePieces.push_back(rotateL(immediate_tmp&0xFF, - (start_pos + pos) % 32 )); - immediate_tmp = rotateR(immediate_tmp,8); - pos+=8; - } - } - } - return immediatePieces; -} diff --git a/lib/Target/ARM/ARMCommon.h b/lib/Target/ARM/ARMCommon.h deleted file mode 100644 index c35150b7a4..0000000000 --- a/lib/Target/ARM/ARMCommon.h +++ /dev/null @@ -1,22 +0,0 @@ -//===-- ARMCommon.h - Define support functions for ARM ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by the "Instituto Nokia de Tecnologia" and -// is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_COMMON_H -#define ARM_COMMON_H - -#include <vector> - -std::vector<unsigned> splitImmediate(unsigned immediate); - -#endif diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp new file mode 100644 index 0000000000..183bde8824 --- /dev/null +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -0,0 +1,490 @@ +//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that splits the constant pool up into 'islands' +// which are scattered through-out the function. This is required due to the +// limited pc-relative displacements that ARM has. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-cp-islands" +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include <iostream> +using namespace llvm; + +STATISTIC(NumSplit, "Number of uncond branches inserted"); + +namespace { + /// ARMConstantIslands - Due to limited pc-relative displacements, ARM + /// requires constant pool entries to be scattered among the instructions + /// inside a function. To do this, it completely ignores the normal LLVM + /// constant pool, instead, it places constants where-ever it feels like with + /// special instructions. + /// + /// The terminology used in this pass includes: + /// Islands - Clumps of constants placed in the function. + /// Water - Potential places where an island could be formed. + /// CPE - A constant pool entry that has been placed somewhere, which + /// tracks a list of users. + class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass { + /// NextUID - Assign unique ID's to CPE's. + unsigned NextUID; + + /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed + /// by MBB Number. + std::vector<unsigned> BBSizes; + + /// WaterList - A sorted list of basic blocks where islands could be placed + /// (i.e. blocks that don't fall through to the following block, due + /// to a return, unreachable, or unconditional branch). + std::vector<MachineBasicBlock*> WaterList; + + /// CPUser - One user of a constant pool, keeping the machine instruction + /// pointer, the constant pool being referenced, and the max displacement + /// allowed from the instruction to the CP. + struct CPUser { + MachineInstr *MI; + MachineInstr *CPEMI; + unsigned MaxDisp; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp) + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {} + }; + + /// CPUsers - Keep track of all of the machine instructions that use various + /// constant pools and their max displacement. + std::vector<CPUser> CPUsers; + + const TargetInstrInfo *TII; + const TargetAsmInfo *TAI; + public: + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM constant island placement pass"; + } + + private: + void DoInitialPlacement(MachineFunction &Fn, + std::vector<MachineInstr*> &CPEMIs); + void InitialFunctionScan(MachineFunction &Fn, + const std::vector<MachineInstr*> &CPEMIs); + void SplitBlockBeforeInstr(MachineInstr *MI); + bool HandleConstantPoolUser(MachineFunction &Fn, CPUser &U); + void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); + + unsigned GetInstSize(MachineInstr *MI) const; + unsigned GetOffsetOf(MachineInstr *MI) const; + }; +} + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMConstantIslandPass() { + return new ARMConstantIslands(); +} + +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { + // If there are no constants, there is nothing to do. + MachineConstantPool &MCP = *Fn.getConstantPool(); + if (MCP.isEmpty()) return false; + + TII = Fn.getTarget().getInstrInfo(); + TAI = Fn.getTarget().getTargetAsmInfo(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + Fn.RenumberBlocks(); + + // Perform the initial placement of the constant pool entries. To start with, + // we put them all at the end of the function. + std::vector<MachineInstr*> CPEMIs; + DoInitialPlacement(Fn, CPEMIs); + + /// The next UID to take is the first unused one. + NextUID = CPEMIs.size(); + + // Do the initial scan of the function, building up information about the + // sizes of each block, the location of all the water, and finding all of the + // constant pool users. + InitialFunctionScan(Fn, CPEMIs); + CPEMIs.clear(); + + // Iteratively place constant pool entries until there is no change. + bool MadeChange; + do { + MadeChange = false; + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) + MadeChange |= HandleConstantPoolUser(Fn, CPUsers[i]); + } while (MadeChange); + + BBSizes.clear(); + WaterList.clear(); + CPUsers.clear(); + + return true; +} + +/// DoInitialPlacement - Perform the initial placement of the constant pool +/// entries. To start with, we put them all at the end of the function. +void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, + std::vector<MachineInstr*> &CPEMIs){ + // Create the basic block to hold the CPE's. + MachineBasicBlock *BB = new MachineBasicBlock(); + Fn.getBasicBlockList().push_back(BB); + + // Add all of the constants from the constant pool to the end block, use an + // identity mapping of CPI's to CPE's. + const std::vector<MachineConstantPoolEntry> &CPs = + Fn.getConstantPool()->getConstants(); + + const TargetData &TD = *Fn.getTarget().getTargetData(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + unsigned Size = TD.getTypeSize(CPs[i].getType()); + // Verify that all constant pool entries are a multiple of 4 bytes. If not, + // we would have to pad them out or something so that instructions stay + // aligned. + assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + MachineInstr *CPEMI = + BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(i).addConstantPoolIndex(i).addImm(Size); + CPEMIs.push_back(CPEMI); + DEBUG(std::cerr << "Moved CPI#" << i << " to end of function as #" + << i << "\n"); + } +} + +/// BBHasFallthrough - Return true of the specified basic block can fallthrough +/// into the block immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + return false; + + MachineBasicBlock *NextBB = next(MBBI); + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I == NextBB) + return true; + + return false; +} + +/// InitialFunctionScan - Do the initial scan of the function, building up +/// information about the sizes of each block, the location of all the water, +/// and finding all of the constant pool users. +void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, + const std::vector<MachineInstr*> &CPEMIs) { + for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + // If this block doesn't fall through into the next MBB, then this is + // 'water' that a constant pool island could be placed. + if (!BBHasFallthrough(&MBB)) + WaterList.push_back(&MBB); + + unsigned MBBSize = 0; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + // Add instruction size to MBBSize. + MBBSize += GetInstSize(I); + + // Scan the instructions for constant pool operands. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) + if (I->getOperand(op).isConstantPoolIndex()) { + // We found one. The addressing mode tells us the max displacement + // from the PC that this instruction permits. + unsigned MaxOffs = 0; + + // Basic size info comes from the TSFlags field. + unsigned TSFlags = I->getInstrDescriptor()->TSFlags; + switch (TSFlags & ARMII::AddrModeMask) { + default: + // Constant pool entries can reach anything. + if (I->getOpcode() == ARM::CONSTPOOL_ENTRY) + continue; + assert(0 && "Unknown addressing mode for CP reference!"); + case ARMII::AddrMode1: // AM1: 8 bits << 2 + MaxOffs = 1 << (8+2); // Taking the address of a CP entry. + break; + case ARMII::AddrMode2: + MaxOffs = 1 << 12; // +-offset_12 + break; + case ARMII::AddrMode3: + MaxOffs = 1 << 8; // +-offset_8 + break; + // addrmode4 has no immediate offset. + case ARMII::AddrMode5: + MaxOffs = 1 << (8+2); // +-(offset_8*4) + break; + case ARMII::AddrModeT1: + MaxOffs = 1 << 5; + break; + case ARMII::AddrModeT2: + MaxOffs = 1 << (5+1); + break; + case ARMII::AddrModeT4: + MaxOffs = 1 << (5+2); + break; + } + + // Remember that this is a user of a CP entry. + MachineInstr *CPEMI =CPEMIs[I->getOperand(op).getConstantPoolIndex()]; + CPUsers.push_back(CPUser(I, CPEMI, MaxOffs)); + + // Instructions can only use one CP entry, don't bother scanning the + // rest of the operands. + break; + } + } + BBSizes.push_back(MBBSize); + } +} + +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) DISABLE_INLINE; +static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, + unsigned JTI) { + return JT[JTI].MBBs.size(); +} + +/// GetInstSize - Return the size of the specified MachineInstr. +/// +unsigned ARMConstantIslands::GetInstSize(MachineInstr *MI) const { + // Basic size info comes from the TSFlags field. + unsigned TSFlags = MI->getInstrDescriptor()->TSFlags; + + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); + assert(0 && "Unknown or unset size field for instr!"); + break; + case ARMII::Size8Bytes: return 8; // Arm instruction x 2. + case ARMII::Size4Bytes: return 4; // Arm instruction. + case ARMII::Size2Bytes: return 2; // Thumb instruction. + case ARMII::SizeSpecial: { + switch (MI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. + unsigned JTI = MI->getOperand(MI->getNumOperands()-2).getJumpTableIndex(); + const MachineFunction *MF = MI->getParent()->getParent(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + return getNumJTEntries(JT, JTI) * 4 + 4; + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } + } + } +} + +/// GetOffsetOf - Return the current offset of the specified machine instruction +/// from the start of the function. This offset changes as stuff is moved +/// around inside the function. +unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = 0; + + // Sum block sizes before MBB. + for (unsigned BB = 0, e = MBB->getNumber(); BB != e; ++BB) + Offset += BBSizes[BB]; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + if (&*I == MI) return Offset; + Offset += GetInstSize(I); + } +} + +/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB +/// ID. +static bool CompareMBBNumbers(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) { + return LHS->getNumber() < RHS->getNumber(); +} + +/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// machine function, it upsets all of the block numbers. Renumber the blocks +/// and update the arrays that parallel this numbering. +void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { + // Renumber the MBB's to keep them consequtive. + NewBB->getParent()->RenumberBlocks(NewBB); + + // Insert a size into BBSizes to align it properly with the (newly + // renumbered) block numbers. + BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); + + // Next, update WaterList. Specifically, we need to add NewMBB as having + // available water after it. + std::vector<MachineBasicBlock*>::iterator IP = + std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, + CompareMBBNumbers); + WaterList.insert(IP, NewBB); +} + + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update datastructures and renumber blocks to +/// account for this change. +void ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; ++MBBI; + OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + BuildMI(OrigBB, TII->get(ARM::B)).addMBB(NewBB); + NumSplit++; + + // Update the CFG. All succs of OrigBB are now succs of NewBB. + while (!OrigBB->succ_empty()) { + MachineBasicBlock *Succ = *OrigBB->succ_begin(); + OrigBB->removeSuccessor(Succ); + NewBB->addSuccessor(Succ); + + // This pass should be run after register allocation, so there should be no + // PHI nodes to update. + assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI) + && "PHI nodes should be eliminated by now!"); + } + + // OrigBB branches to NewBB. + OrigBB->addSuccessor(NewBB); + + // Update internal data structures to account for the newly inserted MBB. + UpdateForInsertedWaterBlock(NewBB); + + // Figure out how large the first NewMBB is. + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += GetInstSize(I); + + // Set the size of NewBB in BBSizes. + BBSizes[NewBB->getNumber()] = NewBBSize; + + // We removed instructions from UserMBB, subtract that off from its size. + // Add 4 to the block to count the unconditional branch we added to it. + BBSizes[OrigBB->getNumber()] -= NewBBSize-4; +} + +/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// is out-of-range. If so, pick it up the constant pool value and move it some +/// place in-range. +bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, CPUser &U){ + MachineInstr *UserMI = U.MI; + MachineInstr *CPEMI = U.CPEMI; + + unsigned UserOffset = GetOffsetOf(UserMI); + unsigned CPEOffset = GetOffsetOf(CPEMI); + + DEBUG(std::cerr << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << U.MaxDisp + << " at offset " << int(UserOffset-CPEOffset) << "\t" + << *UserMI); + + // Check to see if the CPE is already in-range. + if (UserOffset < CPEOffset) { + // User before the CPE. + if (CPEOffset-UserOffset <= U.MaxDisp) + return false; + } else { + if (UserOffset-CPEOffset <= U.MaxDisp) + return false; + } + + + // Solution guaranteed to work: split the user's MBB right before the user and + // insert a clone the CPE into the newly created water. + + // If the user isn't at the start of its MBB, or if there is a fall-through + // into the user's MBB, split the MBB before the User. + MachineBasicBlock *UserMBB = UserMI->getParent(); + if (&UserMBB->front() != UserMI || + UserMBB == &Fn.front() || // entry MBB of function. + BBHasFallthrough(prior(MachineFunction::iterator(UserMBB)))) { + // TODO: Search for the best place to split the code. In practice, using + // loop nesting information to insert these guys outside of loops would be + // sufficient. + SplitBlockBeforeInstr(UserMI); + + // UserMI's BB may have changed. + UserMBB = UserMI->getParent(); + } + + // Okay, we know we can put an island before UserMBB now, do it! + MachineBasicBlock *NewIsland = new MachineBasicBlock(); + Fn.getBasicBlockList().insert(UserMBB, NewIsland); + + // Update internal data structures to account for the newly inserted MBB. + UpdateForInsertedWaterBlock(NewIsland); + + // Now that we have an island to add the CPE to, clone the original CPE and + // add it to the island. + unsigned ID = NextUID++; + unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex(); + unsigned Size = CPEMI->getOperand(2).getImm(); + + // Build a new CPE for this user. + U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY)) + .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + + // Increase the size of the island block to account for the new entry. + BBSizes[NewIsland->getNumber()] += Size; + + // Finally, change the CPI in the instruction operand to be ID. + for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) + if (UserMI->getOperand(i).isConstantPoolIndex()) { + UserMI->getOperand(i).setConstantPoolIndex(ID); + break; + } + + DEBUG(std::cerr << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" + << *UserMI); + + + return true; +} + diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp new file mode 100644 index 0000000000..97cca07d33 --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -0,0 +1,55 @@ +//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#include "ARMConstantPoolValue.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/GlobalValue.h" +using namespace llvm; + +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, + bool isNonLazy, unsigned char PCAdj) + : MachineConstantPoolValue((const Type*)gv->getType()), + GV(gv), LabelId(id), isNonLazyPtr(isNonLazy), PCAdjust(PCAdj) {} + +int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = (1 << Alignment)-1; + const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].Offset & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (CPV->GV == GV && CPV->LabelId == LabelId && + CPV->isNonLazyPtr == isNonLazyPtr) + return i; + } + } + + return -1; +} + +void +ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { + ID.AddPointer(GV); + ID.AddInteger(LabelId); + ID.AddInteger((unsigned)isNonLazyPtr); + ID.AddInteger(PCAdjust); +} + +void ARMConstantPoolValue::print(std::ostream &O) const { + O << GV->getName(); + if (isNonLazyPtr) O << "$non_lazy_ptr"; + if (PCAdjust != 0) O << "-(LPIC" << LabelId << "+" + << (unsigned)PCAdjust << ")"; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h new file mode 100644 index 0000000000..a9143d4ddc --- /dev/null +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -0,0 +1,50 @@ +//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific constantpool value class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H +#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H + +#include "llvm/CodeGen/MachineConstantPool.h" + +namespace llvm { + +/// ARMConstantPoolValue - ARM specific constantpool value. This is used to +/// represent PC relative displacement between the address of the load +/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)). +class ARMConstantPoolValue : public MachineConstantPoolValue { + GlobalValue *GV; // GlobalValue being loaded. + unsigned LabelId; // Label id of the load. + bool isNonLazyPtr; // True if loading a Mac OS X non_lazy_ptr stub. + unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative. + // 8 for ARM, 4 for Thumb. + +public: + ARMConstantPoolValue(GlobalValue *gv, unsigned id, bool isNonLazy = false, + unsigned char PCAdj = 0); + + GlobalValue *getGV() const { return GV; } + unsigned getLabelId() const { return LabelId; } + bool isNonLazyPointer() const { return isNonLazyPtr; } + unsigned char getPCAdjustment() const { return PCAdjust; } + + virtual int getExistingMachineCPValue(MachineConstantPool *CP, + unsigned Alignment); + + virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + + virtual void print(std::ostream &O) const; +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h index 5bd7f67667..c56640a354 100644 --- a/lib/Target/ARM/ARMFrameInfo.h +++ b/lib/Target/ARM/ARMFrameInfo.h @@ -17,17 +17,15 @@ #include "ARM.h" #include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "ARMSubtarget.h" namespace llvm { -class ARMFrameInfo: public TargetFrameInfo { - +class ARMFrameInfo : public TargetFrameInfo { public: - ARMFrameInfo() - : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { + ARMFrameInfo(const ARMSubtarget &ST) + : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) { } - }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 2a5f3e360c..f5f4599b5c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMISelLowering.h" #include "ARMTargetMachine.h" -#include "ARMCommon.h" +#include "ARMAddressingModes.h" #include "llvm/CallingConv.h" +#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/Constants.h" #include "llvm/Intrinsics.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,1081 +28,545 @@ #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include <vector> +#include <iostream> using namespace llvm; +//===--------------------------------------------------------------------===// +/// ARMDAGToDAGISel - ARM specific code to select ARM machine +/// instructions for SelectionDAG operations. +/// namespace { - class ARMTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - public: - ARMTargetLowering(TargetMachine &TM); - virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); - virtual const char *getTargetNodeName(unsigned Opcode) const; - std::vector<unsigned> - getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT::ValueType VT) const; - }; - -} - -ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM) { - addRegisterClass(MVT::i32, ARM::IntRegsRegisterClass); - addRegisterClass(MVT::f32, ARM::FPRegsRegisterClass); - addRegisterClass(MVT::f64, ARM::DFPRegsRegisterClass); - - setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - - setOperationAction(ISD::RET, MVT::Other, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); - - setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::f32, Expand); - setOperationAction(ISD::SETCC, MVT::f64, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); - setOperationAction(ISD::MEMSET, MVT::Other, Expand); - setOperationAction(ISD::MEMCPY, MVT::Other, Expand); - - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - - setOperationAction(ISD::BRCOND, MVT::Other, Expand); - - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Expand); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - - setOperationAction(ISD::ConstantFP, MVT::f64, Custom); - setOperationAction(ISD::ConstantFP, MVT::f32, Custom); - - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - - setStackPointerRegisterToSaveRestore(ARM::R13); - - setSchedulingPreference(SchedulingForRegPressure); - computeRegisterProperties(); -} - -namespace llvm { - namespace ARMISD { - enum NodeType { - // Start the numbering where the builting ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END, - /// CALL - A direct function call. - CALL, - - /// Return with a flag operand. - RET_FLAG, - - CMP, - - SELECT, - - BR, - - FSITOS, - FTOSIS, - - FSITOD, - FTOSID, - - FUITOS, - FTOUIS, - - FUITOD, - FTOUID, - - FMRRD, +class ARMDAGToDAGISel : public SelectionDAGISel { + ARMTargetLowering Lowering; - FMDRR, + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; - FMSTAT - }; +public: + ARMDAGToDAGISel(ARMTargetMachine &TM) + : SelectionDAGISel(Lowering), Lowering(TM), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } -} -/// DAGFPCCToARMCC - Convert a DAG fp condition code to an ARM CC -// Unordered = !N & !Z & C & V = V -// Ordered = N | Z | !C | !V = N | Z | !V -static std::vector<unsigned> DAGFPCCToARMCC(ISD::CondCode CC) { - switch (CC) { - default: - assert(0 && "Unknown fp condition code!"); -// SETOEQ = (N | Z | !V) & Z = Z = EQ - case ISD::SETEQ: - case ISD::SETOEQ: return make_vector<unsigned>(ARMCC::EQ, 0); -// SETOGT = (N | Z | !V) & !N & !Z = !V &!N &!Z = (N = V) & !Z = GT - case ISD::SETGT: - case ISD::SETOGT: return make_vector<unsigned>(ARMCC::GT, 0); -// SETOGE = (N | Z | !V) & !N = (Z | !V) & !N = !V & !N = GE - case ISD::SETGE: - case ISD::SETOGE: return make_vector<unsigned>(ARMCC::GE, 0); -// SETOLT = (N | Z | !V) & N = N = MI - case ISD::SETLT: - case ISD::SETOLT: return make_vector<unsigned>(ARMCC::MI, 0); -// SETOLE = (N | Z | !V) & (N | Z) = N | Z = !C | Z = LS - case ISD::SETLE: - case ISD::SETOLE: return make_vector<unsigned>(ARMCC::LS, 0); -// SETONE = OGT | OLT - case ISD::SETONE: return make_vector<unsigned>(ARMCC::GT, ARMCC::MI, 0); -// SETO = N | Z | !V = Z | !V = !V = VC - case ISD::SETO: return make_vector<unsigned>(ARMCC::VC, 0); -// SETUO = V = VS - case ISD::SETUO: return make_vector<unsigned>(ARMCC::VS, 0); -// SETUEQ = V | Z (need two instructions) = EQ/VS - case ISD::SETUEQ: return make_vector<unsigned>(ARMCC::EQ, ARMCC::VS, 0); -// SETUGT = V | (!Z & !N) = !Z & !N = !Z & C = HI - case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0); -// SETUGE = V | !N = !N = PL - case ISD::SETUGE: return make_vector<unsigned>(ARMCC::PL, 0); -// SETULT = V | N = LT - case ISD::SETULT: return make_vector<unsigned>(ARMCC::LT, 0); -// SETULE = V | Z | N = LE - case ISD::SETULE: return make_vector<unsigned>(ARMCC::LE, 0); -// SETUNE = V | !Z = !Z = NE - case ISD::SETNE: - case ISD::SETUNE: return make_vector<unsigned>(ARMCC::NE, 0); - } -} + virtual const char *getPassName() const { + return "ARM Instruction Selection"; + } + + SDNode *Select(SDOperand Op); + virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); + bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset, SDOperand &Opc); + bool SelectAddrMode2Offset(SDOperand Op, SDOperand N, + SDOperand &Offset, SDOperand &Opc); + bool SelectAddrMode3(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset, SDOperand &Opc); + bool SelectAddrMode3Offset(SDOperand Op, SDOperand N, + SDOperand &Offset, SDOperand &Opc); + bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); -/// DAGIntCCToARMCC - Convert a DAG integer condition code to an ARM CC -static std::vector<unsigned> DAGIntCCToARMCC(ISD::CondCode CC) { - switch (CC) { - default: - assert(0 && "Unknown integer condition code!"); - case ISD::SETEQ: return make_vector<unsigned>(ARMCC::EQ, 0); - case ISD::SETNE: return make_vector<unsigned>(ARMCC::NE, 0); - case ISD::SETLT: return make_vector<unsigned>(ARMCC::LT, 0); - case ISD::SETLE: return make_vector<unsigned>(ARMCC::LE, 0); - case ISD::SETGT: return make_vector<unsigned>(ARMCC::GT, 0); - case ISD::SETGE: return make_vector<unsigned>(ARMCC::GE, 0); - case ISD::SETULT: return make_vector<unsigned>(ARMCC::CC, 0); - case ISD::SETULE: return make_vector<unsigned>(ARMCC::LS, 0); - case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0); - case ISD::SETUGE: return make_vector<unsigned>(ARMCC::CS, 0); - } + bool SelectAddrModePC(SDOperand Op, SDOperand N, SDOperand &Offset, + SDOperand &Label); + + bool SelectThumbAddrModeRR(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); + bool SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); + bool SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); + bool SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); + bool SelectThumbAddrModeSP(SDOperand Op, SDOperand N, SDOperand &Base, + SDOperand &Offset); + + bool SelectShifterOperandReg(SDOperand Op, SDOperand N, SDOperand &A, + SDOperand &B, SDOperand &C); + + // Include the pieces autogenerated from the target description. +#include "ARMGenDAGISel.inc" +}; } -std::vector<unsigned> ARMTargetLowering:: -getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT::ValueType VT) const { - if (Constraint.size() == 1) { - // FIXME: handling only r regs - switch (Constraint[0]) { - default: break; // Unknown constraint letter - - case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS - if (VT == MVT::i32) - return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11, - ARM::R12, ARM::R13, ARM::R14, 0); - break; +void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { + DEBUG(BB->dump()); - } - } + DAG.setRoot(SelectRoot(DAG.getRoot())); + DAG.RemoveDeadNodes(); - return std::vector<unsigned>(); + ScheduleAndEmitDAG(DAG); } -const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case ARMISD::CALL: return "ARMISD::CALL"; - case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; - case ARMISD::SELECT: return "ARMISD::SELECT"; - case ARMISD::CMP: return "ARMISD::CMP"; - case ARMISD::BR: return "ARMISD::BR"; - case ARMISD::FSITOS: return "ARMISD::FSITOS"; - case ARMISD::FTOSIS: return "ARMISD::FTOSIS"; - case ARMISD::FSITOD: return "ARMISD::FSITOD"; - case ARMISD::FTOSID: return "ARMISD::FTOSID"; - case ARMISD::FUITOS: return "ARMISD::FUITOS"; - case ARMISD::FTOUIS: return "ARMISD::FTOUIS"; - case ARMISD::FUITOD: return "ARMISD::FUITOD"; - case ARMISD::FTOUID: return "ARMISD::FTOUID"; - case ARMISD::FMRRD: return "ARMISD::FMRRD"; - case ARMISD::FMDRR: return "ARMISD::FMDRR"; - case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; +bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset, + SDOperand &Opc) { + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return true; } -} - -class ArgumentLayout { - std::vector<bool> is_reg; - std::vector<unsigned> pos; - std::vector<MVT::ValueType> types; -public: - ArgumentLayout(const std::vector<MVT::ValueType> &Types) { - types = Types; - - unsigned RegNum = 0; - unsigned StackOffset = 0; - for(std::vector<MVT::ValueType>::const_iterator I = Types.begin(); - I != Types.end(); - ++I) { - MVT::ValueType VT = *I; - assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64); - unsigned size = MVT::getSizeInBits(VT)/32; - - RegNum = ((RegNum + size - 1) / size) * size; - if (RegNum < 4) { - pos.push_back(RegNum); - is_reg.push_back(true); - RegNum += size; - } else { - unsigned bytes = size * 32/8; - StackOffset = ((StackOffset + bytes - 1) / bytes) * bytes; - pos.push_back(StackOffset); - is_reg.push_back(false); - StackOffset += bytes; + + // Match simple R +/- imm12 operands. + if (N.getOpcode() == ISD::ADD) + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits. + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, RHSC, + ARM_AM::no_shift), + MVT::i32); + return true; + } else if (RHSC < 0 && RHSC > -0x1000) { + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::sub, -RHSC, + ARM_AM::no_shift), + MVT::i32); + return true; } } - } - unsigned getRegisterNum(unsigned argNum) { - assert(isRegister(argNum)); - return pos[argNum]; - } - unsigned getOffset(unsigned argNum) { - assert(isOffset(argNum)); - return pos[argNum]; - } - unsigned isRegister(unsigned argNum) { - assert(argNum < is_reg.size()); - return is_reg[argNum]; - } - unsigned isOffset(unsigned argNum) { - return !isRegister(argNum); - } - MVT::ValueType getType(unsigned argNum) { - assert(argNum < types.size()); - return types[argNum]; - } - unsigned getStackSize(void) { - int last = is_reg.size() - 1; - if (last < 0) - return 0; - if (isRegister(last)) - return 0; - return getOffset(last) + MVT::getSizeInBits(getType(last))/8; - } - int lastRegArg(void) { - int size = is_reg.size(); - int last = 0; - while(last < size && isRegister(last)) - last++; - last--; - return last; - } - int lastRegNum(void) { - int l = lastRegArg(); - if (l < 0) - return -1; - unsigned r = getRegisterNum(l); - MVT::ValueType t = getType(l); - assert(t == MVT::i32 || t == MVT::f32 || t == MVT::f64); - if (t == MVT::f64) - return r + 1; - return r; - } -}; - -// This transforms a ISD::CALL node into a -// callseq_star <- ARMISD:CALL <- callseq_end -// chain -static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) { - SDOperand Chain = Op.getOperand(0); - unsigned CallConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); - assert((CallConv == CallingConv::C || - CallConv == CallingConv::Fast) - && "unknown calling convention"); - SDOperand Callee = Op.getOperand(4); - unsigned NumOps = (Op.getNumOperands() - 5) / 2; - SDOperand StackPtr = DAG.getRegister(ARM::R13, MVT::i32); - static const unsigned regs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - std::vector<MVT::ValueType> Types; - for (unsigned i = 0; i < NumOps; ++i) { - MVT::ValueType VT = Op.getOperand(5+2*i).getValueType(); - Types.push_back(VT); - } - ArgumentLayout Layout(Types); - - unsigned NumBytes = Layout.getStackSize(); - - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, MVT::i32)); - - //Build a sequence of stores - std::vector<SDOperand> MemOpChains; - for (unsigned i = Layout.lastRegArg() + 1; i < NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - unsigned ArgOffset = Layout.getOffset(i); - SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - } - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - // Likewise ExternalSymbol -> TargetExternalSymbol. - assert(Callee.getValueType() == MVT::i32); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); - else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); - - // If this is a direct call, pass the chain and the callee. - assert (Callee.Val); - std::vector<SDOperand> Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into the appropriate regs. - SDOperand InFlag; - for (int i = 0, e = Layout.lastRegArg(); i <= e; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - unsigned RegNum = Layout.getRegisterNum(i); - unsigned Reg1 = regs[RegNum]; - MVT::ValueType VT = Layout.getType(i); - assert(VT == Arg.getValueType()); - assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64); - - // Add argument register to the end of the list so that it is known live - // into the call. - Ops.push_back(DAG.getRegister(Reg1, MVT::i32)); - if (VT == MVT::f64) { - unsigned Reg2 = regs[RegNum + 1]; - SDOperand SDReg1 = DAG.getRegister(Reg1, MVT::i32); - SDOperand SDReg2 = DAG.getRegister(Reg2, MVT::i32); - - Ops.push_back(DAG.getRegister(Reg2, MVT::i32)); - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag); - SDOperand Ops[] = {Chain, SDReg1, SDReg2, Arg, InFlag}; - Chain = DAG.getNode(ARMISD::FMRRD, VTs, Ops, InFlag.Val ? 5 : 4); + + // Otherwise this is R +/- [possibly shifted] R + ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + unsigned ShAmt = 0; + + Base = N.getOperand(0); + Offset = N.getOperand(1); + + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { + ShAmt = Sh->getValue(); + Offset = N.getOperand(1).getOperand(0); } else { - if (VT == MVT::f32) - Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg); - Chain = DAG.getCopyToReg(Chain, Reg1, Arg, InFlag); + ShOpcVal = ARM_AM::no_shift; } - InFlag = Chain.getValue(1); } - - std::vector<MVT::ValueType> NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - - unsigned CallOpc = ARMISD::CALL; - if (InFlag.Val) - Ops.push_back(InFlag); - Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - std::vector<SDOperand> ResultVals; - NodeTys.clear(); - - // If the call has results, copy the values out of the ret val registers. - MVT::ValueType VT = Op.Val->getValueType(0); - if (VT != MVT::Other) { - assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64); - - SDOperand Value1 = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag); - Chain = Value1.getValue(1); - InFlag = Value1.getValue(2); - NodeTys.push_back(VT); - if (VT == MVT::i32) { - ResultVals.push_back(Value1); - if (Op.Val->getValueType(1) == MVT::i32) { - SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag); - Chain = Value2.getValue(1); - ResultVals.push_back(Value2); - NodeTys.push_back(VT); + + // Try matching (R shl C) + (R). + if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't + // fold it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { + ShAmt = Sh->getValue(); + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShOpcVal = ARM_AM::no_shift; } } - if (VT == MVT::f32) { - SDOperand Value = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Value1); - ResultVals.push_back(Value); - } - if (VT == MVT::f64) { - SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag); - Chain = Value2.getValue(1); - SDOperand Value = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2); - ResultVals.push_back(Value); - } } - - Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, - DAG.getConstant(NumBytes, MVT::i32)); - NodeTys.push_back(MVT::Other); - - if (ResultVals.empty()) - return Chain; - - ResultVals.push_back(Chain); - SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0], - ResultVals.size()); - return Res.getValue(Op.ResNo); + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; } -static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { - SDOperand Copy; - SDOperand Chain = Op.getOperand(0); - SDOperand R0 = DAG.getRegister(ARM::R0, MVT::i32); - SDOperand R1 = DAG.getRegister(ARM::R1, MVT::i32); - - switch(Op.getNumOperands()) { - default: - assert(0 && "Do not know how to return this many arguments!"); - abort(); - case 1: { - SDOperand LR = DAG.getRegister(ARM::R14, MVT::i32); - return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain); +bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDOperand Op, SDOperand N, + SDOperand &Offset, SDOperand &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { + int Val = (int)C->getValue(); + if (Val >= 0 && Val < 0x1000) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; + } } - case 3: { - SDOperand Val = Op.getOperand(1); - assert(Val.getValueType() == MVT::i32 || - Val.getValueType() == MVT::f32 || - Val.getValueType() == MVT::f64); - if (Val.getValueType() == MVT::f64) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag); - SDOperand Ops[] = {Chain, R0, R1, Val}; - Copy = DAG.getNode(ARMISD::FMRRD, VTs, Ops, 4); + Offset = N; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + unsigned ShAmt = 0; + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShAmt = Sh->getValue(); + Offset = N.getOperand(0); } else { - if (Val.getValueType() == MVT::f32) - Val = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Val); - Copy = DAG.getCopyToReg(Chain, R0, Val, SDOperand()); - } - - if (DAG.getMachineFunction().liveout_empty()) { - DAG.getMachineFunction().addLiveOut(ARM::R0); - if (Val.getValueType() == MVT::f64) - DAG.getMachineFunction().addLiveOut(ARM::R1); - } - break; - } - case 5: - Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand()); - Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1)); - // If we haven't noted the R0+R1 are live out, do so now. - if (DAG.getMachineFunction().liveout_empty()) { - DAG.getMachineFunction().addLiveOut(ARM::R0); - DAG.getMachineFunction().addLiveOut(ARM::R1); + ShOpcVal = ARM_AM::no_shift; } - break; } - //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag - return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; } -static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType PtrVT = Op.getValueType(); - ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); - SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - return CPI; -} - -SDOperand LegalizeImmediate(uint32_t immediate, SelectionDAG &DAG, - bool canReturnConstant){ - SDOperand Shift = DAG.getTargetConstant(0, MVT::i32); - SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32); - std::vector<unsigned>immediatePieces = splitImmediate(immediate); - if (immediatePieces.size()>1){ - unsigned movInst = ARM::MOV; - unsigned orInst = ARM::ORR; - SDNode *node; - //try mvn - std::vector<unsigned>immediateNegPieces = splitImmediate(~immediate); - if (immediatePieces.size() > immediateNegPieces.size()) { - //use mvn/eor - movInst = ARM::MVN; - orInst = ARM::EOR; - immediatePieces = immediateNegPieces; - } - SDOperand n = DAG.getTargetConstant(immediatePieces[0], MVT::i32); - node = DAG.getTargetNode(movInst, MVT::i32, n, Shift, ShiftType); - std::vector<unsigned>::iterator it; - for (it=immediatePieces.begin()+1; it != immediatePieces.end(); ++it){ - n = DAG.getTargetConstant(*it, MVT::i32); - SDOperand ops[] = {SDOperand(node, 0), n, Shift, ShiftType}; - node = DAG.getTargetNode(orInst, MVT::i32, ops, 4); +bool ARMDAGToDAGISel::SelectAddrMode3(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset, + SDOperand &Opc) { + if (N.getOpcode() == ISD::SUB) { + // X - C is canonicalize to X + -C, no need to handle it here. + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); + return true; + } + + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); } - return SDOperand(node, 0); - } else { - if (canReturnConstant) - return DAG.getTargetConstant(immediate, MVT::i32); - else { - SDOperand n = DAG.getTargetConstant(immediate, MVT::i32); - SDNode *node = DAG.getTargetNode(ARM::MOV, MVT::i32, n, Shift, - ShiftType); - return SDOperand(node, 0); + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); + return true; + } + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if (RHSC >= 0 && RHSC < 256) { + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, RHSC), + MVT::i32); + return true; + } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, -RHSC), + MVT::i32); + return true; } } + + Base = N.getOperand(0); + Offset = N.getOperand(1); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); + return true; } -static SDOperand LowerConstantFP(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType VT = Op.getValueType(); - SDOperand Shift = DAG.getTargetConstant(0, MVT::i32); - SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32); - SDNode *node; - switch (VT) { - default: assert(0 && "VT!=f32 && VT!=f64"); - case MVT::f32: { - float val = cast<ConstantFPSDNode>(Op)->getValue(); - uint32_t i32_val = FloatToBits(val); - SDOperand c = LegalizeImmediate(i32_val, DAG, false); - node = DAG.getTargetNode(ARM::FMSR, MVT::f32, c); - break; - } - case MVT::f64: { - double val = cast<ConstantFPSDNode>(Op)->getValue(); - uint64_t i64_val = DoubleToBits(val); - SDOperand hi = LegalizeImmediate(Hi_32(i64_val), DAG, false); - SDOperand lo = LegalizeImmediate(Lo_32(i64_val), DAG, false); - node = DAG.getTargetNode(ARM::FMDRR, MVT::f64, lo, hi); - break; - } +bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDOperand Op, SDOperand N, + SDOperand &Offset, SDOperand &Opc) { + unsigned Opcode = Op.getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast<LoadSDNode>(Op)->getAddressingMode() + : cast<StoreSDNode>(Op)->getAddressingMode(); + ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) + ? ARM_AM::add : ARM_AM::sub; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { + int Val = (int)C->getValue(); + if (Val >= 0 && Val < 256) { + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); + return true; + } } - return SDOperand(node, 0); -} - -static SDOperand LowerGlobalAddress(SDOperand Op, - SelectionDAG &DAG) { - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - int alignment = 2; - SDOperand CPAddr = DAG.getConstantPool(GV, MVT::i32, alignment); - return DAG.getLoad(MVT::i32, DAG.getEntryNode(), CPAddr, NULL, 0); -} -static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { - // vastart just stores the address of the VarArgsFrameIndex slot into the - // memory location argument. - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); - return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(), - SV->getOffset()); + Offset = N; + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32); + return true; } -static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, - int &VarArgsFrameIndex) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - SSARegMap *RegMap = MF.getSSARegMap(); - unsigned NumArgs = Op.Val->getNumValues()-1; - SDOperand Root = Op.getOperand(0); - bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; - static const unsigned REGS[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - std::vector<MVT::ValueType> Types(Op.Val->value_begin(), Op.Val->value_end() - 1); - ArgumentLayout Layout(Types); - - std::vector<SDOperand> ArgValues; - for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) { - MVT::ValueType VT = Types[ArgNo]; - SDOperand Value; - if (Layout.isRegister(ArgNo)) { - assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64); - unsigned RegNum = Layout.getRegisterNum(ArgNo); - unsigned Reg1 = REGS[RegNum]; - unsigned VReg1 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass); - SDOperand Value1 = DAG.getCopyFromReg(Root, VReg1, MVT::i32); - MF.addLiveIn(Reg1, VReg1); - if (VT == MVT::f64) { - unsigned Reg2 = REGS[RegNum + 1]; - unsigned VReg2 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass); - SDOperand Value2 = DAG.getCopyFromReg(Root, VReg2, MVT::i32); - MF.addLiveIn(Reg2, VReg2); - Value = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2); - } else { - Value = Value1; - if (VT == MVT::f32) - Value = DAG.getNode(ISD::BIT_CONVERT, VT, Value); - } - } else { - // If the argument is actually used, emit a load from the right stack - // slot. - if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { - unsigned Offset = Layout.getOffset(ArgNo); - unsigned Size = MVT::getSizeInBits(VT)/8; - int FI = MFI->CreateFixedObject(Size, Offset); - SDOperand FIN = DAG.getFrameIndex(FI, VT); - Value = DAG.getLoad(VT, Root, FIN, NULL, 0); - } else { - Value = DAG.getNode(ISD::UNDEF, VT); - } +bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset) { + if (N.getOpcode() != ISD::ADD) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); } - ArgValues.push_back(Value); + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; } - - unsigned NextRegNum = Layout.lastRegNum() + 1; - - if (isVarArg) { - //If this function is vararg we must store the remaing - //registers so that they can be acessed with va_start - VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8, - -16 + NextRegNum * 4); - - SmallVector<SDOperand, 4> MemOps; - for (unsigned RegNo = NextRegNum; RegNo < 4; ++RegNo) { - int RegOffset = - (4 - RegNo) * 4; - int FI = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8, - RegOffset); - SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); - - unsigned VReg = RegMap->createVirtualRegister(&ARM::IntRegsRegClass); - MF.addLiveIn(REGS[RegNo], VReg); - - SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32); - SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); - MemOps.push_back(Store); + + // If the RHS is +/- imm8, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4. + RHSC >>= 2; + if (RHSC >= 0 && RHSC < 256) { + Base = N.getOperand(0); + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, RHSC), + MVT::i32); + return true; + } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed. + Base = N.getOperand(0); + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::sub,-RHSC), + MVT::i32); + return true; + } } - Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); } - - ArgValues.push_back(Root); - - // Return the new list of results. - std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), - Op.Val->value_end()); - return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); -} - -static SDOperand GetCMP(ISD::CondCode CC, SDOperand LHS, SDOperand RHS, - SelectionDAG &DAG) { - MVT::ValueType vt = LHS.getValueType(); - assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64); - - SDOperand Cmp = DAG.getNode(ARMISD::CMP, MVT::Flag, LHS, RHS); - - if (vt != MVT::i32) - Cmp = DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp); - return Cmp; -} - -static std::vector<SDOperand> GetARMCC(ISD::CondCode CC, MVT::ValueType vt, - SelectionDAG &DAG) { - assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64); - std::vector<unsigned> vcc; - if (vt == MVT::i32) - vcc = DAGIntCCToARMCC(CC); - else - vcc = DAGFPCCToARMCC(CC); - - std::vector<unsigned>::iterator it; - std::vector<SDOperand> result; - for( it = vcc.begin(); it != vcc.end(); it++ ) - result.push_back(DAG.getConstant(*it,MVT::i32)); - return result; -} - -static bool isUInt8Immediate(uint32_t x) { - return x < (1 << 8); -} - -static uint32_t rotateL(uint32_t x) { - uint32_t bit31 = (x & (1 << 31)) >> 31; - uint32_t t = x << 1; - return t | bit31; + + Base = N; + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), + MVT::i32); + return true; } -static bool isRotInt8Immediate(uint32_t x) { - int r; - for (r = 0; r < 16; r++) { - if (isUInt8Immediate(x)) - return true; - x = rotateL(rotateL(x)); +bool ARMDAGToDAGISel::SelectAddrModePC(SDOperand Op, SDOperand N, + SDOperand &Offset, SDOperand &Label) { + if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { + Offset = N.getOperand(0); + SDOperand N1 = N.getOperand(1); + Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getValue(), + MVT::i32); + return true; } return false; } -static void LowerCMP(SDOperand &Cmp, std::vector<SDOperand> &ARMCC, - SDOperand LHS, SDOperand RHS, ISD::CondCode CC, - SelectionDAG &DAG) { - MVT::ValueType vt = LHS.getValueType(); - if (vt == MVT::i32) { - assert(!isa<ConstantSDNode>(LHS)); - if (ConstantSDNode *SD_C = dyn_cast<ConstantSDNode>(RHS.Val)) { - uint32_t C = SD_C->getValue(); - - uint32_t NC; - switch(CC) { - default: - NC = C; break; - case ISD::SETLT: - case ISD::SETULT: - case ISD::SETGE: - case ISD::SETUGE: - NC = C - 1; break; - case ISD::SETLE: - case ISD::SETULE: - case ISD::SETGT: - case ISD::SETUGT: - NC = C + 1; break; - } - - ISD::CondCode NCC; - switch(CC) { - default: - NCC = CC; break; - case ISD::SETLT: - NCC = ISD::SETLE; break; - case ISD::SETULT: - NCC = ISD::SETULE; break; - case ISD::SETGE: - NCC = ISD::SETGT; break; - case ISD::SETUGE: - NCC = ISD::SETUGT; break; - case ISD::SETLE: - NCC = ISD::SETLT; break; - case ISD::SETULE: - NCC = ISD::SETULT; break; - case ISD::SETGT: - NCC = ISD::SETGE; break; - case ISD::SETUGT: - NCC = ISD::SETUGE; break; - } - - if (!isRotInt8Immediate(C) && isRotInt8Immediate(NC)) { - RHS = DAG.getConstant(NC, MVT::i32); - CC = NCC; - } - } - } - Cmp = GetCMP(CC, LHS, RHS, DAG); - ARMCC = GetARMCC(CC, vt, DAG); +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset){ + if (N.getOpcode() != ISD::ADD) + return false; + Base = N.getOperand(0); + Offset = N.getOperand(1); + return true; } -static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { - SDOperand LHS = Op.getOperand(0); - SDOperand RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDOperand TrueVal = Op.getOperand(2); - SDOperand FalseVal = Op.getOperand(3); - SDOperand Cmp; - std::vector<SDOperand> ARMCC; - LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG); - - SDOperand Aux = FalseVal; - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); - std::vector<SDOperand>::iterator it; - for (it = ARMCC.begin(); it != ARMCC.end(); ++it){ - SDOperand Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1); - SDOperand Ops[] = {TrueVal, Aux, *it, Flag}; - Aux = DAG.getNode(ARMISD::SELECT, VTs, Ops, 4); +static bool SelectThumbAddrModeRI5(SDOperand N, unsigned Scale, + TargetLowering &TLI, SelectionDAG *CurDAG, + SDOperand &Base, SDOperand &Offset) { + if (N.getOpcode() == ISD::FrameIndex) + return false; + + if (N.getOpcode() != ISD::ADD) { + Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; } - return Aux; -} -static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG) { - SDOperand Chain = Op.getOperand(0); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDOperand LHS = Op.getOperand(2); - SDOperand RHS = Op.getOperand(3); - SDOperand Dest = Op.getOperand(4); - SDOperand Cmp; - std::vector<SDOperand> ARMCC; - LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG); - - SDOperand Aux = Chain; - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag); - std::vector<SDOperand>::iterator it; - for (it = ARMCC.begin(); it != ARMCC.end(); it++){ - SDOperand Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1); - SDOperand Ops[] = {Aux, Dest, *it, Flag}; - Aux = DAG.getNode(ARMISD::BR, VTs, Ops, 4); + // If the RHS is + imm5 * scale, fold into addr mode. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied. + RHSC /= Scale; + if (RHSC >= 0 && RHSC < 32) { + Base = N.getOperand(0); + Offset = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } } - return Aux; -} -static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { - SDOperand IntVal = Op.getOperand(0); - assert(IntVal.getValueType() == MVT::i32); - MVT::ValueType vt = Op.getValueType(); - assert(vt == MVT::f32 || - vt == MVT::f64); - - SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal); - ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FSITOS : ARMISD::FSITOD; - return DAG.getNode(op, vt, Tmp); -} - -static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { - assert(Op.getValueType() == MVT::i32); - SDOperand FloatVal = Op.getOperand(0); - MVT::ValueType vt = FloatVal.getValueType(); - assert(vt == MVT::f32 || vt == MVT::f64); - - ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOSIS : ARMISD::FTOSID; - SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal); - return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp); -} - -static SDOperand LowerUINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { - SDOperand IntVal = Op.getOperand(0); - assert(IntVal.getValueType() == MVT::i32); - MVT::ValueType vt = Op.getValueType(); - assert(vt == MVT::f32 || - vt == MVT::f64); - - SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal); - ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FUITOS : ARMISD::FUITOD; - return DAG.getNode(op, vt, Tmp); -} - -static SDOperand LowerFP_TO_UINT(SDOperand Op, SelectionDAG &DAG) { - assert(Op.getValueType() == MVT::i32); - SDOperand FloatVal = Op.getOperand(0); - MVT::ValueType vt = FloatVal.getValueType(); - assert(vt == MVT::f32 || vt == MVT::f64); - - ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOUIS : ARMISD::FTOUID; - SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal); - return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp); -} - -SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { - switch (Op.getOpcode()) { - default: - assert(0 && "Should not custom lower this!"); - abort(); - case ISD::ConstantPool: - return LowerConstantPool(Op, DAG); - case ISD::ConstantFP: - return LowerConstantFP(Op, DAG); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - case ISD::FP_TO_SINT: - return LowerFP_TO_SINT(Op, DAG); - case ISD::SINT_TO_FP: - return LowerSINT_TO_FP(Op, DAG); - case ISD::FP_TO_UINT: - return LowerFP_TO_UINT(Op, DAG); - case ISD::UINT_TO_FP: - return LowerUINT_TO_FP(Op, DAG); - case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); - case ISD::CALL: - return LowerCALL(Op, DAG); - case ISD::RET: - return LowerRET(Op, DAG); - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); - case ISD::BR_CC: - return LowerBR_CC(Op, DAG); - case ISD::VASTART: - return LowerVASTART(Op, DAG, VarArgsFrameIndex); - } + return false; } -//===----------------------------------------------------------------------===// -// Instruction Selector Implementation -//===----------------------------------------------------------------------===// - -//===--------------------------------------------------------------------===// -/// ARMDAGToDAGISel - ARM specific code to select ARM machine -/// instructions for SelectionDAG operations. -/// -namespace { -class ARMDAGToDAGISel : public SelectionDAGISel { - ARMTargetLowering Lowering; - -public: - ARMDAGToDAGISel(TargetMachine &TM) - : SelectionDAGISel(Lowering), Lowering(TM) { - } - - SDNode *Select(SDOperand Op); - virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); - bool SelectAddrMode1(SDOperand Op, SDOperand N, SDOperand &Arg, - SDOperand &Shift, SDOperand &ShiftType); - bool SelectAddrMode1a(SDOperand Op, SDOperand N, SDOperand &Arg, - SDOperand &Shift, SDOperand &ShiftType); - bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Arg, - SDOperand &Offset); - bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Arg, - SDOperand &Offset); - - // Include the pieces autogenerated from the target description. -#include "ARMGenDAGISel.inc" -}; - -void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { - DEBUG(BB->dump()); - - DAG.setRoot(SelectRoot(DAG.getRoot())); - DAG.RemoveDeadNodes(); - - ScheduleAndEmitDAG(DAG); +bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset){ + return SelectThumbAddrModeRI5(N, 1, TLI, CurDAG, Base, Offset); } -static bool isInt12Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) - return false; - - int32_t t = cast<ConstantSDNode>(N)->getValue(); - int max = 1<<12; - int min = -max; - if (t > min && t < max) { - Imm = t; - return true; - } - else - return false; +bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset){ + return SelectThumbAddrModeRI5(N, 2, TLI, CurDAG, Base, Offset); } -static bool isInt12Immediate(SDOperand Op, short &Imm) { - return isInt12Immediate(Op.Val, Imm); +bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset){ + return SelectThumbAddrModeRI5(N, 4, TLI, CurDAG, Base, Offset); } -bool ARMDAGToDAGISel::SelectAddrMode1(SDOperand Op, - SDOperand N, - SDOperand &Arg, - SDOperand &Shift, - SDOperand &ShiftType) { - switch(N.getOpcode()) { - case ISD::Constant: { - uint32_t val = cast<ConstantSDNode>(N)->getValue(); - Shift = CurDAG->getTargetConstant(0, MVT::i32); - ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32); - Arg = LegalizeImmediate(val, *CurDAG, true); - return true; - } - - case ISD::SRA: - Arg = N.getOperand(0); - Shift = N.getOperand(1); - ShiftType = CurDAG->getTargetConstant(ARMShift::ASR, MVT::i32); - return true; - case ISD::SRL: - Arg = N.getOperand(0); - Shift = N.getOperand(1); - ShiftType = CurDAG->getTargetConstant(ARMShift::LSR, MVT::i32); - return true; - case ISD::SHL: - Arg = N.getOperand(0); - Shift = N.getOperand(1); - ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32); +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDOperand Op, SDOperand N, + SDOperand &Base, SDOperand &Offset) { + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } - - Arg = N; - Shift = CurDAG->getTargetConstant(0, MVT::i32); - ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32); - return true; + + return false; } -bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N, - SDOperand &Arg, SDOperand &Offset) { - //TODO: complete and cleanup! - SDOperand Zero = CurDAG->getTargetConstant(0, MVT::i32); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { - Arg = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = Zero; - return true; - } - if (N.getOpcode() == ISD::ADD) { - short imm = 0; - if (isInt12Immediate(N.getOperand(1), imm)) { - Offset = CurDAG->getTargetConstant(imm, MVT::i32); - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { - Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); - } else { - Arg = N.getOperand(0); - } - return true; // [r+i] - } +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDOperand Op, + SDOperand N, + SDOperand &BaseReg, + SDOperand &ShReg, + SDOperand &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getValue() & 31; + } else { + ShReg = N.getOperand(1); } - Offset = Zero; - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) - Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); - else - Arg = N; + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op, - SDOperand N, SDOperand &Arg, - SDOperand &Offset) { - //TODO: detect offset - Offset = CurDAG->getTargetConstant(0, MVT::i32); - Arg = N; - return true; -} SDNode *ARMDAGToDAGISel::Select(SDOperand Op) { SDNode *N = Op.Val; + unsigned Opcode = N->getOpcode(); + + if (Opcode >= ISD::BUILTIN_OP_END && Opcode < ARMISD::FIRST_NUMBER) + return NULL; // Already selected. switch (N->getOpcode()) { - default: - return SelectCode(Op); + default: break; + case ISD::Constant: { + unsigned Val = cast<ConstantSDNode>(N)->getValue(); + bool UseCP = true; + if (Subtarget->isThumb()) + UseCP = (Val > 255 && // MOV + ~Val > 255 && // MOV + MVN + !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL + else + UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV + ARM_AM::getSOImmVal(~Val) == -1 && // MVN + !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + if (UseCP) { + SDOperand CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + TLI.getPointerTy()); + SDOperand Ops[] = { + CPIdx, + CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(0, MVT::i32), + CurDAG->getEntryNode() + }; + SDNode *ResNode = + CurDAG->getTargetNode(ARM::LDR, MVT::i32, MVT::Other, Ops, 4); + ReplaceUses(Op, SDOperand(ResNode, 0)); + return NULL; + } + + // Other cases are autogenerated. break; + } case ISD::FrameIndex: { + // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDOperand Ops[] = {CurDAG->getTargetFrameIndex(FI, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32), - CurDAG->getTargetConstant(0, MVT::i32), - CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32)}; + unsigned Opc = Subtarget->isThumb() ? ARM::tADDrSPi : ARM::ADDri; + SDOperand TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); + } + case ISD::MUL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RHSV = C->getValue(); + if (!RHSV) break; + if (isPowerOf2_32(RHSV-1)) { // 2^n+1? + SDOperand V = Op.getOperand(0); + AddToISelQueue(V); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1)); + SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32) + }; + return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 4); + } + if (isPowerOf2_32(RHSV+1)) { // 2^n-1? + SDOperand V = Op.getOperand(0); + AddToISelQueue(V); + unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1)); + SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), + CurDAG->getTargetConstant(ShImm, MVT::i32) + }; + return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 4); + } + } + break; + case ARMISD::FMRRD: + AddToISelQueue(Op.getOperand(0)); + return CurDAG->getTargetNode(ARM::FMRRD, MVT::i32, MVT::i32, + Op.getOperand(0)); + case ARMISD::MULHILOU: + AddToISelQueue(Op.getOperand(0)); + AddToISelQueue(Op.getOperand(1)); + return CurDAG->getTargetNode(ARM::UMULL, MVT::i32, MVT::i32, + Op.getOperand(0), Op.getOperand(1)); + case ARMISD::MULHILOS: + AddToISelQueue(Op.getOperand(0)); + AddToISelQueue(Op.getOperand(1)); + return CurDAG->getTargetNode(ARM::SMULL, MVT::i32, MVT::i32, + Op.getOperand(0), Op.getOperand(1)); + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + MVT::ValueType LoadedVT = LD->getLoadedVT(); + if (AM != ISD::UNINDEXED) { + SDOperand Offset, AMOpc; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + bool Match = false; + if (LoadedVT == MVT::i32 && + SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST; + Match = true; + } else if (LoadedVT == MVT::i16 && + SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) + ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) + : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); + } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { + if (LD->getExtensionType() == ISD::SEXTLOAD) { + if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; + } + } else { + if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) { + Match = true; + Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST; + } + } + } - return CurDAG->SelectNodeTo(N, ARM::ADD, MVT::i32, Ops, - sizeof(Ops)/sizeof(SDOperand)); + if (Match) { + SDOperand Chain = LD->getChain(); + SDOperand Base = LD->getBasePtr(); + AddToISelQueue(Chain); + AddToISelQueue(Base); + AddToISelQueue(Offset); + SDOperand Ops[] = { Base, Offset, AMOpc, Chain }; + return CurDAG->getTargetNode(Opcode, MVT::i32, MVT::i32, + MVT::Other, Ops, 4); + } + } + // Other cases are autogenerated. break; } } -} -} // end anonymous namespace + return SelectCode(Op); +} /// createARMISelDag - This pass converts a legalized DAG into a /// ARM-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createARMISelDag(TargetMachine &TM) { +FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) { return new ARMDAGToDAGISel(TM); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp new file mode 100644 index 0000000000..f4cba4b336 --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -0,0 +1,1414 @@ +//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" +#include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/ADT/VectorExtras.h" +using namespace llvm; + +ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) + : TargetLowering(TM), ARMPCLabelIndex(0) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + + // Uses VFP for Thumb libfuncs if available. + if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + // Single-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); + setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); + setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); + setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); + + // Double-precision floating-point arithmetic. + setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); + setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); + setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); + setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); + + // Single-precision comparisons. + setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); + setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); + setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); + setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); + setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); + setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); + setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); + + // Double-precision comparisons. + setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); + setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); + setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); + setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); + setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); + setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); + setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); + + // Floating-point to integer conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); + + // Conversions between floating types. + setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); + + // Integer to floating-point conversions. + // i64 conversions are done via library routines even when generating VFP + // instructions, so use the same ones. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: e.g. + // __floatunsidf vs. __floatunssidfvfp. + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); + } + + addRegisterClass(MVT::i32, ARM::GPRRegisterClass); + if (Subtarget->hasVFP2() && !Subtarget->isThumb()) { + addRegisterClass(MVT::f32, ARM::SPRRegisterClass); + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + } + + // ARM does not have f32 extending load. + setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand); + + // ARM supports all 4 flavors of integer indexed load / store. + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i1, Legal); + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i1, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + } + + // i64 operation support. + if (Subtarget->isThumb()) { + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + } else { + setOperationAction(ISD::MUL, MVT::i64, Custom); + setOperationAction(ISD::MULHU, MVT::i32, Custom); + if (!Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Custom); + } + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL, MVT::i64, Custom); + setOperationAction(ISD::SRA, MVT::i64, Custom); + + // ARM does not have ROTL. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + if (!Subtarget->hasV5TOps()) + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + + // These are expanded into libcalls. + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + // Support label based line numbers. + setOperationAction(ISD::LOCATION, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + // FIXME - use subtarget debug flags + if (Subtarget->isDarwin()) + setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); + + setOperationAction(ISD::RET, MVT::Other, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + + // Expand mem operations genericly. + setOperationAction(ISD::MEMSET , MVT::Other, Expand); + setOperationAction(ISD::MEMCPY , MVT::Other, Expand); + setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); + + // Use the default implementation. + setOperationAction(ISD::VASTART , MVT::Other, Expand); + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + + if (!Subtarget->hasV6Ops()) { + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + } + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + if (Subtarget->hasVFP2() && !Subtarget->isThumb()) + // Turn f64->i64 into FMRRD iff target supports vfp2. + setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + + setOperationAction(ISD::SETCC , MVT::i32, Expand); + setOperationAction(ISD::SETCC , MVT::f32, Expand); + setOperationAction(ISD::SETCC , MVT::f64, Expand); + setOperationAction(ISD::SELECT , MVT::i32, Expand); + setOperationAction(ISD::SELECT , MVT::f32, Expand); + setOperationAction(ISD::SELECT , MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::BRCOND , MVT::Other, Expand); + setOperationAction(ISD::BR_CC , MVT::i32, Custom); + setOperationAction(ISD::BR_CC , MVT::f32, Custom); + setOperationAction(ISD::BR_CC , MVT::f64, Custom); + setOperationAction(ISD::BR_JT , MVT::Other, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + // FP Constants can't be immediates. + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + + // We don't support sin/cos/fmod/copysign + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + + // int <-> fp are custom expanded into bit_convert + ARMISD ops. + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + + setStackPointerRegisterToSaveRestore(ARM::SP); + + setSchedulingPreference(SchedulingForRegPressure); + computeRegisterProperties(); +} + + +const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case ARMISD::Wrapper: return "ARMISD::Wrapper"; + case ARMISD::WrapperCall: return "ARMISD::WrapperCall"; + case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; + case ARMISD::CALL: return "ARMISD::CALL"; + case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; + case ARMISD::tCALL: return "ARMISD::tCALL"; + case ARMISD::BRCOND: return "ARMISD::BRCOND"; + case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::CMP: return "ARMISD::CMP"; + case ARMISD::CMPFP: return "ARMISD::CMPFP"; + case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CNEG: return "ARMISD::CNEG"; + + case ARMISD::FTOSI: return "ARMISD::FTOSI"; + case ARMISD::FTOUI: return "ARMISD::FTOUI"; + case ARMISD::SITOF: return "ARMISD::SITOF"; + case ARMISD::UITOF: return "ARMISD::UITOF"; + case ARMISD::MULHILOU: return "ARMISD::MULHILOU"; + case ARMISD::MULHILOS: return "ARMISD::MULHILOS"; + + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; + case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; + case ARMISD::RRX: return "ARMISD::RRX"; + + case ARMISD::FMRRD: return "ARMISD::FMRRD"; + case ARMISD::FMDRR: return "ARMISD::FMDRR"; + } +} + +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// + + +/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC +static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { + switch (CC) { + default: assert(0 && "Unknown condition code!"); + case ISD::SETNE: return ARMCC::NE; + case ISD::SETEQ: return ARMCC::EQ; + case ISD::SETGT: return ARMCC::GT; + case ISD::SETGE: return ARMCC::GE; + case ISD::SETLT: return ARMCC::LT; + case ISD::SETLE: return ARMCC::LE; + case ISD::SETUGT: return ARMCC::HI; + case ISD::SETUGE: return ARMCC::HS; + case ISD::SETULT: return ARMCC::LO; + case ISD::SETULE: return ARMCC::LS; + } +} + +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It +/// returns true if the operands should be inverted to form the proper +/// comparison. +static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + ARMCC::CondCodes &CondCode2) { + bool Invert = false; + CondCode2 = ARMCC::AL; + switch (CC) { + default: assert(0 && "Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: CondCode = ARMCC::EQ; break; + case ISD::SETGT: + case ISD::SETOGT: CondCode = ARMCC::GT; break; + case ISD::SETGE: + case ISD::SETOGE: CondCode = ARMCC::GE; break; + case ISD::SETOLT: CondCode = ARMCC::MI; break; + case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; + case ISD::SETO: CondCode = ARMCC::VC; break; + case ISD::SETUO: CondCode = ARMCC::VS; break; + case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; + case ISD::SETUGT: CondCode = ARMCC::HI; break; + case ISD::SETUGE: CondCode = ARMCC::PL; break; + case ISD::SETLT: + case ISD::SETULT: CondCode = ARMCC::LT; break; + case ISD::SETLE: + case ISD::SETULE: CondCode = ARMCC::LE; break; + case ISD::SETNE: + case ISD::SETUNE: CondCode = ARMCC::NE; break; + } + return Invert; +} + +static void +HowToPassArgument(MVT::ValueType ObjectVT, + unsigned NumGPRs, unsigned &ObjSize, unsigned &ObjGPRs) { + ObjSize = 0; + ObjGPRs = 0; + + switch (ObjectVT) { + default: assert(0 && "Unhandled argument type!"); + case MVT::i32: + case MVT::f32: + if (NumGPRs < 4) + ObjGPRs = 1; + else + ObjSize = 4; + break; + case MVT::i64: + case MVT::f64: + if (NumGPRs < 3) + ObjGPRs = 2; + else if (NumGPRs == 3) { + ObjGPRs = 1; + ObjSize = 4; + } else + ObjSize = 8; + } +} + +// This transforms a ISD::CALL node into a +// callseq_star <- ARMISD:CALL <- callseq_end +// chain +SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType RetVT= Op.Val->getValueType(0); + SDOperand Chain = Op.getOperand(0); + unsigned CallConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); + assert((CallConv == CallingConv::C || + CallConv == CallingConv::CSRet || + CallConv == CallingConv::Fast) && "unknown calling convention"); + SDOperand Callee = Op.getOperand(4); + unsigned NumOps = (Op.getNumOperands() - 5) / 2; + unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot + unsigned NumGPRs = 0; // GPRs used for parameter passing. + + // Count how many bytes are to be pushed on the stack. + unsigned NumBytes = 0; + + // Add up all the space actually used. + for (unsigned i = 0; i < NumOps; ++i) { + unsigned ObjSize = 0; + unsigned ObjGPRs = 0; + MVT::ValueType ObjectVT = Op.getOperand(5+2*i).getValueType(); + HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs); + NumBytes += ObjSize; + NumGPRs += ObjGPRs; + } + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, MVT::i32)); + + SDOperand StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + + static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + NumGPRs = 0; + std::vector<std::pair<unsigned, SDOperand> > RegsToPass; + std::vector<SDOperand> MemOpChains; + for (unsigned i = 0; i != NumOps; ++i) { + SDOperand Arg = Op.getOperand(5+2*i); + MVT::ValueType ArgVT = Arg.getValueType(); + + unsigned ObjSize = 0; + unsigned ObjGPRs = 0; + HowToPassArgument(ArgVT, NumGPRs, ObjSize, ObjGPRs); + if (ObjGPRs > 0) { + switch (ArgVT) { + default: assert(0 && "Unexpected ValueType for argument!"); + case MVT::i32: + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg)); + break; + case MVT::f32: + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], + DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg))); + break; + case MVT::i64: { + SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg, + DAG.getConstant(0, getPointerTy())); + SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg, + DAG.getConstant(1, getPointerTy())); + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo)); + if (ObjGPRs == 2) + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi)); + else { + SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Hi, PtrOff, NULL, 0)); + } + break; + } + case MVT::f64: { + SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, + DAG.getVTList(MVT::i32, MVT::i32), + &Arg, 1); + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt)); + if (ObjGPRs == 2) + RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], + Cvt.getValue(1))); + else { + SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Cvt.getValue(1), PtrOff, + NULL, 0)); + } + break; + } + } + } else { + assert(ObjSize != 0); + SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + } + + NumGPRs += ObjGPRs; + ArgOffset += ObjSize; + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + bool isDirect = false; + bool isARMFunc = false; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + isDirect = true; + bool isExt = (GV->isExternal() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage()); + bool isStub = (isExt && Subtarget->isDarwin()) && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // Wrap it since tBX takes a register source operand. + if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) + Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + isDirect = true; + bool isStub = Subtarget->isDarwin() && + getTargetMachine().getRelocationModel() != Reloc::Static; + isARMFunc = !Subtarget->isThumb() || isStub; + // Wrap it since tBX takes a register source operand. + if (!Subtarget->hasV5TOps() && Subtarget->isThumb()) + Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee); + } + + std::vector<MVT::ValueType> NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + std::vector<SDOperand> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // FIXME: handle tail calls differently. + unsigned CallOpc; + if (Subtarget->isThumb()) { + if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc)) + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; + } else { + CallOpc = (isDirect || Subtarget->hasV5TOps()) + ? ARMISD::CALL : ARMISD::CALL_NOLINK; + } + if (InFlag.Val) + Ops.push_back(InFlag); + Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + SDOperand CSOps[] = { Chain, DAG.getConstant(NumBytes, MVT::i32), InFlag }; + Chain = DAG.getNode(ISD::CALLSEQ_END, + DAG.getNodeValueTypes(MVT::Other, MVT::Flag), + ((RetVT != MVT::Other) ? 2 : 1), CSOps, 3); + if (RetVT != MVT::Other) + InFlag = Chain.getValue(1); + + std::vector<SDOperand> ResultVals; + NodeTys.clear(); + + // If the call has results, copy the values out of the ret val registers. + switch (RetVT) { + default: assert(0 && "Unexpected ret value!"); + case MVT::Other: + break; + case MVT::i32: + Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + if (Op.Val->getValueType(1) == MVT::i32) { + // Returns a i64 value. + Chain = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, + Chain.getValue(2)).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + NodeTys.push_back(MVT::i32); + } + NodeTys.push_back(MVT::i32); + break; + case MVT::f32: + Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1); + ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, MVT::f32, + Chain.getValue(0))); + NodeTys.push_back(MVT::f32); + break; + case MVT::f64: { + SDOperand Lo = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag); + SDOperand Hi = DAG.getCopyFromReg(Lo, ARM::R1, MVT::i32, Lo.getValue(2)); + ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, MVT::f64, Lo, Hi)); + NodeTys.push_back(MVT::f64); + break; + } + } + + NodeTys.push_back(MVT::Other); + + if (ResultVals.empty()) + return Chain; + + ResultVals.push_back(Chain); + SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0], + ResultVals.size()); + return Res.getValue(Op.ResNo); +} + +static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { + SDOperand Copy; + SDOperand Chain = Op.getOperand(0); + switch(Op.getNumOperands()) { + default: + assert(0 && "Do not know how to return this many arguments!"); + abort(); + case 1: { + SDOperand LR = DAG.getRegister(ARM::LR, MVT::i32); + return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain); + } + case 3: + Op = Op.getOperand(1); + if (Op.getValueType() == MVT::f32) { + Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op); + } else if (Op.getValueType() == MVT::f64) { + // Recursively legalize f64 -> i64. + Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Op); + return DAG.getNode(ISD::RET, MVT::Other, Chain, Op, + DAG.getConstant(0, MVT::i32)); + } + Copy = DAG.getCopyToReg(Chain, ARM::R0, Op, SDOperand()); + if (DAG.getMachineFunction().liveout_empty()) + DAG.getMachineFunction().addLiveOut(ARM::R0); + break; + case 5: + Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand()); + Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1)); + // If we haven't noted the R0+R1 are live out, do so now. + if (DAG.getMachineFunction().liveout_empty()) { + DAG.getMachineFunction().addLiveOut(ARM::R0); + DAG.getMachineFunction().addLiveOut(ARM::R1); + } + break; + } + + //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag + return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); +} + +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOVri. +static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + SDOperand Res; + if (CP->isMachineConstantPoolEntry()) + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); + else + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); + return DAG.getNode(ARMISD::Wrapper, MVT::i32, Res); +} + +/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol +/// even in dynamic-no-pic mode. +static bool GVIsIndirectSymbol(GlobalValue *GV) { + return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || + (GV->isExternal() && !GV->hasNotBeenReadFromBytecode())); +} + +SDOperand ARMTargetLowering::LowerGlobalAddress(SDOperand Op, + SelectionDAG &DAG) { + MVT::ValueType PtrVT = getPointerTy(); + GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + bool IsIndirect = Subtarget->isDarwin() && GVIsIndirectSymbol(GV); + SDOperand CPAddr; + if (RelocM == Reloc::Static) + CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2); + else { + unsigned PCAdj = (RelocM != Reloc::PIC_) + ? 0 : (Subtarget->isThumb() ? 4 : 8); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, + IsIndirect, PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2); + } + CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); + + SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0); + SDOperand Chain = Result.getValue(1); + + if (RelocM == Reloc::PIC_) { + SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel); + } + if (IsIndirect) + Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0); + + return Result; +} + +static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, + unsigned VarArgsFrameIndex) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); + return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(), + SV->getOffset()); +} + +static SDOperand LowerFORMAL_ARGUMENT(SDOperand Op, SelectionDAG &DAG, + unsigned *vRegs, unsigned ArgNo, + unsigned &NumGPRs, unsigned &ArgOffset) { + MachineFunction &MF = DAG.getMachineFunction(); + MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); + SDOperand Root = Op.getOperand(0); + std::vector<SDOperand> ArgValues; + SSARegMap *RegMap = MF.getSSARegMap(); + + static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + unsigned ObjSize = 0; + unsigned ObjGPRs = 0; + HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs); + + SDOperand ArgValue; + if (ObjGPRs == 1) { + unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass); + MF.addLiveIn(GPRArgRegs[NumGPRs], VReg); + vRegs[NumGPRs] = VReg; + ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32); + if (ObjectVT == MVT::f32) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, ArgValue); + } else if (ObjGPRs == 2) { + unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass); + MF.addLiveIn(GPRArgRegs[NumGPRs], VReg); + vRegs[NumGPRs] = VReg; + ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32); + + VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass); + MF.addLiveIn(GPRArgRegs[NumGPRs+1], VReg); + vRegs[NumGPRs+1] = VReg; + SDOperand ArgValue2 = DAG.getCopyFromReg(Root, VReg, MVT::i32); + + if (ObjectVT == MVT::i64) + ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); + else + ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2); + } + NumGPRs += ObjGPRs; + + if (ObjSize) { + // If the argument is actually used, emit a load from the right stack + // slot. + if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); + if (ObjGPRs == 0) + ArgValue = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + else { + SDOperand ArgValue2 = + DAG.getLoad(MVT::i32, Root, FIN, NULL, 0); + if (ObjectVT == MVT::i64) + ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2); + else + ArgValue= DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2); + } + } else { + // Don't emit a dead load. + ArgValue = DAG.getNode(ISD::UNDEF, ObjectVT); + } + + ArgOffset += ObjSize; // Move on to the next argument. + } + + return ArgValue; +} + +SDOperand +ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { + std::vector<SDOperand> ArgValues; + SDOperand Root = Op.getOperand(0); + unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot + unsigned NumGPRs = 0; // GPRs used for parameter passing. + unsigned VRegs[4]; + + unsigned NumArgs = Op.Val->getNumValues()-1; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) + ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, VRegs, ArgNo, + NumGPRs, ArgOffset)); + + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; + if (isVarArg) { + static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + MachineFunction &MF = DAG.getMachineFunction(); + SSARegMap *RegMap = MF.getSSARegMap(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned VARegSaveSize = (4 - NumGPRs) * 4; + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + SmallVector<SDOperand, 4> MemOps; + for (; NumGPRs < 4; ++NumGPRs) { + unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass); + MF.addLiveIn(GPRArgRegs[NumGPRs], VReg); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); + } + + ArgValues.push_back(Root); + + // Return the new list of results. + std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), + Op.Val->value_end()); + return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); +} + +/// isFloatingPointZero - Return true if this is +0.0. +static bool isFloatingPointZero(SDOperand Op) { + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) + return CFP->isExactlyValue(0.0); + else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { + // Maybe this has already been legalized into the constant pool? + if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { + SDOperand WrapperOp = Op.getOperand(1).getOperand(0); + if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) + if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + return CFP->isExactlyValue(0.0); + } + } + return false; +} + +static bool isLegalCmpImmediate(int C, bool isThumb) { + return ( isThumb && (C & ~255U) == 0) || + (!isThumb && ARM_AM::getSOImmVal(C) != -1); +} + +/// Returns appropriate ARM CMP (cmp) and corresponding condition code for +/// the given operands. +static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC, + SDOperand &ARMCC, SelectionDAG &DAG, bool isThumb) { + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.Val)) { + int C = (int)RHSC->getValue(); + if (!isLegalCmpImmediate(C, isThumb)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETULT: + case ISD::SETGE: + case ISD::SETUGE: + if (isLegalCmpImmediate(C-1, isThumb)) { + switch (CC) { + default: break; + case ISD::SETLT: CC = ISD::SETLE; break; + case ISD::SETULT: CC = ISD::SETULE; break; + case ISD::SETGE: CC = ISD::SETGT; break; + case ISD::SETUGE: CC = ISD::SETUGT; break; + } + RHS = DAG.getConstant(C-1, MVT::i32); + } + break; + case ISD::SETLE: + case ISD::SETULE: + case ISD::SETGT: + case ISD::SETUGT: + if (isLegalCmpImmediate(C+1, isThumb)) { + switch (CC) { + default: break; + case ISD::SETLE: CC = ISD::SETLT; break; + case ISD::SETULE: CC = ISD::SETULT; break; + case ISD::SETGT: CC = ISD::SETGE; break; + case ISD::SETUGT: CC = ISD::SETUGE; break; + } + RHS = DAG.getConstant(C+1, MVT::i32); + } + break; + } + } + } + + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + ARMCC = DAG.getConstant(CondCode, MVT::i32); + return DAG.getNode(ARMISD::CMP, MVT::Flag, LHS, RHS); +} + +/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. +static SDOperand getVFPCmp(SDOperand LHS, SDOperand RHS, SelectionDAG &DAG) { + SDOperand Cmp; + if (!isFloatingPointZero(RHS)) + Cmp = DAG.getNode(ARMISD::CMPFP, MVT::Flag, LHS, RHS); + else + Cmp = DAG.getNode(ARMISD::CMPFPw0, MVT::Flag, LHS); + return DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp); +} + +static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT::ValueType VT = Op.getValueType(); + SDOperand LHS = Op.getOperand(0); + SDOperand RHS = Op.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); + SDOperand TrueVal = Op.getOperand(2); + SDOperand FalseVal = Op.getOperand(3); + + if (LHS.getValueType() == MVT::i32) { + SDOperand ARMCC; + SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb()); + return DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, ARMCC, Cmp); + } + + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + std::swap(TrueVal, FalseVal); + + SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDOperand Cmp = getVFPCmp(LHS, RHS, DAG); + SDOperand Result = DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, + ARMCC, Cmp); + if (CondCode2 != ARMCC::AL) { + SDOperand ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + // FIXME: Needs another CMP because flag can have but one use. + SDOperand Cmp2 = getVFPCmp(LHS, RHS, DAG); + Result = DAG.getNode(ARMISD::CMOV, VT, Result, TrueVal, ARMCC2, Cmp2); + } + return Result; +} + +static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDOperand Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDOperand LHS = Op.getOperand(2); + SDOperand RHS = Op.getOperand(3); + SDOperand Dest = Op.getOperand(4); + + if (LHS.getValueType() == MVT::i32) { + SDOperand ARMCC; + SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb()); + return DAG.getNode(ARMISD::BRCOND, MVT::Other, Chain, Dest, ARMCC, Cmp); + } + + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + ARMCC::CondCodes CondCode, CondCode2; + if (FPCCToARMCC(CC, CondCode, CondCode2)) + // Swap the LHS/RHS of the comparison if needed. + std::swap(LHS, RHS); + + SDOperand Cmp = getVFPCmp(LHS, RHS, DAG); + SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDOperand Ops[] = { Chain, Dest, ARMCC, Cmp }; + SDOperand Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4); + if (CondCode2 != ARMCC::AL) { + ARMCC = DAG.getConstant(CondCode2, MVT::i32); + SDOperand Ops[] = { Res, Dest, ARMCC, Res.getValue(1) }; + Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4); + } + return Res; +} + +SDOperand ARMTargetLowering::LowerBR_JT(SDOperand Op, SelectionDAG &DAG) { + SDOperand Chain = Op.getOperand(0); + SDOperand Table = Op.getOperand(1); + SDOperand Index = Op.getOperand(2); + + MVT::ValueType PTy = getPointerTy(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + SDOperand UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + Table = DAG.getNode(ARMISD::WrapperJT, MVT::i32, JTI, UId); + Index = DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(4, PTy)); + SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table); + bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + Addr = DAG.getLoad(isPIC ? MVT::i32 : PTy, Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); + if (isPIC) + Addr = DAG.getNode(ISD::ADD, PTy, Addr, Table); + return DAG.getNode(ARMISD::BR_JT, MVT::Other, Chain, Addr, JTI, UId); +} + +static SDOperand LowerFP_TO_INT(SDOperand Op, SelectionDAG &DAG) { + unsigned Opc = + Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI; + Op = DAG.getNode(Opc, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op); +} + +static SDOperand LowerINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { + MVT::ValueType VT = Op.getValueType(); + unsigned Opc = + Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; + + Op = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Opc, VT, Op); +} + +static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { + // Implement fcopysign with a fabs and a conditional fneg. + SDOperand Tmp0 = Op.getOperand(0); + SDOperand Tmp1 = Op.getOperand(1); + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType SrcVT = Tmp1.getValueType(); + SDOperand AbsVal = DAG.getNode(ISD::FABS, VT, Tmp0); + SDOperand Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG); + SDOperand ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, Cmp); +} + +static SDOperand LowerBIT_CONVERT(SDOperand Op, SelectionDAG &DAG) { + // Turn f64->i64 into FMRRD. + assert(Op.getValueType() == MVT::i64 && + Op.getOperand(0).getValueType() == MVT::f64); + + Op = Op.getOperand(0); + SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32), + &Op, 1); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Cvt, Cvt.getValue(1)); +} + +static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { + // FIXME: All this code is target-independent. Create a new target-indep + // MULHILO node and move this code to the legalizer. + // + assert(Op.getValueType() == MVT::i64 && "Only handles i64 expand right now!"); + + SDOperand LL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + SDOperand RL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1), + DAG.getConstant(0, MVT::i32)); + + const TargetLowering &TL = DAG.getTargetLoweringInfo(); + unsigned LHSSB = TL.ComputeNumSignBits(Op.getOperand(0)); + unsigned RHSSB = TL.ComputeNumSignBits(Op.getOperand(1)); + + SDOperand Lo, Hi; + // Figure out how to lower this multiply. + if (LHSSB >= 33 && RHSSB >= 33) { + // If the input values are both sign extended, we can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL); + Hi = DAG.getNode(ISD::MULHS, MVT::i32, LL, RL); + } else if (LHSSB == 32 && RHSSB == 32 && + TL.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) && + TL.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) { + // If the inputs are zero extended, use mulhu. + Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL); + Hi = DAG.getNode(ISD::MULHU, MVT::i32, LL, RL); + } else { + SDOperand LH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), + DAG.getConstant(1, MVT::i32)); + SDOperand RH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1), + DAG.getConstant(1, MVT::i32)); + + // Lo,Hi = umul LHS, RHS. + SDOperand Ops[] = { LL, RL }; + SDOperand UMul64 = DAG.getNode(ARMISD::MULHILOU, + DAG.getVTList(MVT::i32, MVT::i32), Ops, 2); + Lo = UMul64; + Hi = UMul64.getValue(1); + RH = DAG.getNode(ISD::MUL, MVT::i32, LL, RH); + LH = DAG.getNode(ISD::MUL, MVT::i32, LH, RL); + Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, RH); + Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, LH); + } + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); +} + +static SDOperand LowerMULHU(SDOperand Op, SelectionDAG &DAG) { + SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; + return DAG.getNode(ARMISD::MULHILOU, + DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1); +} + +static SDOperand LowerMULHS(SDOperand Op, SelectionDAG &DAG) { + SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; + return DAG.getNode(ARMISD::MULHILOS, + DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1); +} + +static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + assert(Op.getValueType() == MVT::i64 && + (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) && + "Unknown shift to lower!"); + + // We only lower SRA, SRL of 1 here, all others use generic lowering. + if (!isa<ConstantSDNode>(Op.getOperand(1)) || + cast<ConstantSDNode>(Op.getOperand(1))->getValue() != 1) + return SDOperand(); + + // If we are in thumb mode, we don't have RRX. + if (ST->isThumb()) return SDOperand(); + + // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. + SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), + DAG.getConstant(1, MVT::i32)); + + // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and + // captures the result into a carry flag. + unsigned Opc = Op.getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; + Hi = DAG.getNode(Opc, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); + + // The low part is an ARMISD::RRX operand, which shifts the carry in. + Lo = DAG.getNode(ARMISD::RRX, MVT::i32, Lo, Hi.getValue(1)); + + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); +} + +SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: assert(0 && "Don't know how to custom lower this!"); abort(); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::CALL: return LowerCALL(Op, DAG); + case ISD::RET: return LowerRET(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); + case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); + case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::MULHU: return LowerMULHU(Op, DAG); + case ISD::MULHS: return LowerMULHS(Op, DAG); + case ISD::SRL: + case ISD::SRA: return LowerSRx(Op, DAG, Subtarget); + case ISD::FORMAL_ARGUMENTS: + return LowerFORMAL_ARGUMENTS(Op, DAG); + } +} + +//===----------------------------------------------------------------------===// +// ARM Scheduler Hooks +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +ARMTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *BB) { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case ARM::tMOVCCr: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + ilist<MachineBasicBlock>::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); + BuildMI(BB, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()); + MachineFunction *F = BB->getParent(); + F->getBasicBlockList().insert(It, copy0MBB); + F->getBasicBlockList().insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + delete MI; // The pseudo instruction is gone now. + return BB; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Optimization Hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressImmediate - Return true if the integer value or +/// GlobalValue can be used as the offset of the target addressing mode. +bool ARMTargetLowering::isLegalAddressImmediate(int64_t V) const { + // ARM allows a 12-bit immediate field. + return V == V & ((1LL << 12) - 1); +} + +bool ARMTargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { + return false; +} + +static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT, + bool isSEXTLoad, SDOperand &Base, + SDOperand &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + + if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { + // AddressingMode 3 + Base = Ptr->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if (RHSC < 0 && RHSC > -256) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + return true; + } + } + isInc = (Ptr->getOpcode() == ISD::ADD); + Offset = Ptr->getOperand(1); + return true; + } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { + // AddressingMode 2 + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getValue(); + if (RHSC < 0 && RHSC > -0x1000) { + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + Base = Ptr->getOperand(0); + return true; + } + } + + if (Ptr->getOpcode() == ISD::ADD) { + isInc = true; + ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + Base = Ptr->getOperand(1); + Offset = Ptr->getOperand(0); + } else { + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + } + return true; + } + + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + return true; + } + + // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + return false; +} + +/// getPreIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if the node's address +/// can be legally represented as pre-indexed load / store address. +bool +ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, + SDOperand &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) { + if (Subtarget->isThumb()) + return false; + + MVT::ValueType VT; + SDOperand Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getLoadedVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getStoredVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Ptr.Val, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; + } + return false; +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDOperand &Base, + SDOperand &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) { + if (Subtarget->isThumb()) + return false; + + MVT::ValueType VT; + SDOperand Ptr; + bool isSEXTLoad = false; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getLoadedVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getStoredVT(); + } else + return false; + + bool isInc; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (isLegal) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + return false; +} + +void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + unsigned Depth) const { + KnownZero = 0; + KnownOne = 0; + switch (Op.getOpcode()) { + default: break; + case ARMISD::CMOV: { + // Bits are known zero/one if known on the LHS and RHS. + ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + if (KnownZero == 0 && KnownOne == 0) return; + + uint64_t KnownZeroRHS, KnownOneRHS; + ComputeMaskedBits(Op.getOperand(1), Mask, + KnownZeroRHS, KnownOneRHS, Depth+1); + KnownZero &= KnownZeroRHS; + KnownOne &= KnownOneRHS; + return; + } + } +} + +//===----------------------------------------------------------------------===// +// ARM Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +ARMTargetLowering::ConstraintType +ARMTargetLowering::getConstraintType(char ConstraintLetter) const { + switch (ConstraintLetter) { + case 'l': + return C_RegisterClass; + default: return TargetLowering::getConstraintType(ConstraintLetter); + } +} + +std::pair<unsigned, const TargetRegisterClass*> +ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const { + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'l': + // FIXME: in thumb mode, 'l' is only low-regs. + // FALL THROUGH. + case 'r': + return std::make_pair(0U, ARM::GPRRegisterClass); + break; + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +std::vector<unsigned> ARMTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const { + if (Constraint.size() != 1) + return std::vector<unsigned>(); + + switch (Constraint[0]) { // GCC ARM Constraint Letters + default: break; + case 'l': + case 'r': + return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, ARM::R11, + ARM::R12, ARM::LR, 0); + } + + return std::vector<unsigned>(); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h new file mode 100644 index 0000000000..5f7ed65a3e --- /dev/null +++ b/lib/Target/ARM/ARMISelLowering.h @@ -0,0 +1,134 @@ +//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that ARM uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMISELLOWERING_H +#define ARMISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include <vector> + +namespace llvm { + class ARMConstantPoolValue; + class ARMSubtarget; + + namespace ARMISD { + // ARM Specific DAG Nodes + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END, + + Wrapper, // Wrapper - A wrapper node for TargetConstantPool, + // TargetExternalSymbol, and TargetGlobalAddress. + WrapperCall, // WrapperCall - Same as wrapper, but mark the wrapped + // node as call operand. + WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable + + CALL, // Function call. + CALL_NOLINK, // Function call with branch not branch-and-link. + tCALL, // Thumb function call. + BRCOND, // Conditional branch. + BR_JT, // Jumptable branch. + RET_FLAG, // Return with a flag operand. + + PIC_ADD, // Add with a PC operand and a PIC label. + + CMP, // ARM compare instructions. + CMPFP, // ARM VFP compare instruction, sets FPSCR. + CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. + FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CNEG, // ARM conditional negate instructions. + + FTOSI, // FP to sint within a FP register. + FTOUI, // FP to uint within a FP register. + SITOF, // sint to FP within a FP register. + UITOF, // uint to FP within a FP register. + + MULHILOU, // Lo,Hi = umul LHS, RHS. + MULHILOS, // Lo,Hi = smul LHS, RHS. + + SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. + SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. + RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. + + FMRRD, // double to two gprs. + FMDRR // Two gprs to double. + }; + } + + //===----------------------------------------------------------------------===// + // ARMTargetLowering - X86 Implementation of the TargetLowering interface + + class ARMTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + public: + ARMTargetLowering(TargetMachine &TM); + + virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI, + MachineBasicBlock *MBB); + + /// isLegalAddressImmediate - Return true if the integer value or + /// GlobalValue can be used as the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(int64_t V) const; + virtual bool isLegalAddressImmediate(GlobalValue *GV) const; + + /// getPreIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if the node's address + /// can be legally represented as pre-indexed load / store address. + virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base, + SDOperand &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG); + + /// getPostIndexedAddressParts - returns true by value, base pointer and + /// offset pointer and addressing mode by reference if this node can be + /// combined with a load / store to form a post-indexed load / store. + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDOperand &Base, SDOperand &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG); + + virtual void computeMaskedBitsForTargetNode(const SDOperand Op, + uint64_t Mask, + uint64_t &KnownZero, + uint64_t &KnownOne, + unsigned Depth) const; + ConstraintType getConstraintType(char ConstraintLetter) const; + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const; + std::vector<unsigned> + getRegClassForInlineAsmConstraint(const std::string &Constraint, + MVT::ValueType VT) const; + private: + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + + /// ARMPCLabelIndex - Keep track the number of ARM PC labels created. + /// + unsigned ARMPCLabelIndex; + + SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); + SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); + }; +} + +#endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index f99615b572..b5425fec8a 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -14,46 +14,409 @@ #include "ARMInstrInfo.h" #include "ARM.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; -ARMInstrInfo::ARMInstrInfo() +static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, + cl::desc("Enable ARM 2-addr to 3-addr conv")); + +ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])), - RI(*this) { + RI(*this, STI) { +} + +unsigned ARMInstrInfo::getDWARF_LABELOpcode() const { + return ARM::DWARF_LABEL; } const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const { - return &ARM::IntRegsRegClass; + return &ARM::GPRRegClass; } /// Return true if the instruction is a register to register move and /// leave the source and dest operands in the passed parameters. /// bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg) const { + unsigned &SrcReg, unsigned &DstReg) const { MachineOpCode oc = MI.getOpcode(); switch (oc) { - case ARM::MOV: { - assert(MI.getNumOperands() == 4 && - MI.getOperand(0).isRegister() && + default: + return false; + case ARM::FCPYS: + case ARM::FCPYD: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + case ARM::MOVrr: + case ARM::tMOVrr: + assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() && + MI.getOperand(1).isRegister() && "Invalid ARM MOV instruction"); - const MachineOperand &Arg = MI.getOperand(1); - const MachineOperand &Shift = MI.getOperand(2); - if (Arg.isRegister() && Shift.isImmediate() && Shift.getImmedValue() == 0) { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } +} + +unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{ + switch (MI->getOpcode()) { + default: break; + case ARM::LDR: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImmediate() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FLDD: + case ARM::FLDS: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isImmediate() && + MI->getOperand(2).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::tLDRspi: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isImmediate() && + MI->getOperand(2).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STR: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImmediate() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FSTD: + case ARM::FSTS: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isImmediate() && + MI->getOperand(2).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); } + break; + case ARM::tSTRspi: + if (MI->getOperand(1).isFrameIndex() && + MI->getOperand(2).isImmediate() && + MI->getOperand(2).getImmedValue() == 0) { + FrameIndex = MI->getOperand(1).getFrameIndex(); + return MI->getOperand(0).getReg(); + } + break; } + return 0; +} + +static unsigned getUnindexedOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::LDR_PRE: + case ARM::LDR_POST: + return ARM::LDR; + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + return ARM::LDRH; + case ARM::LDRB_PRE: + case ARM::LDRB_POST: + return ARM::LDRB; + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + return ARM::LDRSH; + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + return ARM::LDRSB; + case ARM::STR_PRE: + case ARM::STR_POST: + return ARM::STR; + case ARM::STRH_PRE: + case ARM::STRH_POST: + return ARM::STRH; + case ARM::STRB_PRE: + case ARM::STRB_POST: + return ARM::STRB; } - return false; + return 0; } -void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const std::vector<MachineOperand> &Cond)const{ - // Can only insert uncond branches so far. - assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, get(ARM::b)).addMBB(TBB); +MachineInstr * +ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables &LV) const { + if (!EnableARM3Addr) + return NULL; + + MachineInstr *MI = MBBI; + unsigned TSFlags = MI->getInstrDescriptor()->TSFlags; + bool isPre = false; + switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { + default: return NULL; + case ARMII::IndexModePre: + isPre = true; + break; + case ARMII::IndexModePost: + break; + } + + // Try spliting an indexed load / store to a un-indexed one plus an add/sub + // operation. + unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); + if (MemOpc == 0) + return NULL; + + MachineInstr *UpdateMI = NULL; + MachineInstr *MemMI = NULL; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + unsigned NumOps = MI->getNumOperands(); + bool isLoad = (MI->getInstrDescriptor()->Flags & M_LOAD_FLAG) != 0; + const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); + const MachineOperand &Base = MI->getOperand(2); + const MachineOperand &Offset = MI->getOperand(NumOps-2); + unsigned WBReg = WB.getReg(); + unsigned BaseReg = Base.getReg(); + unsigned OffReg = Offset.getReg(); + unsigned OffImm = MI->getOperand(NumOps-1).getImm(); + switch (AddrMode) { + default: + assert(false && "Unknown indexed op!"); + return NULL; + case ARMII::AddrMode2: { + bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + if (OffReg == 0) { + int SOImmVal = ARM_AM::getSOImmVal(Amt); + if (SOImmVal == -1) + // Can't encode it in a so_imm operand. This transformation will + // add more than 1 instruction. Abandon! + return NULL; + UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(SOImmVal); + } else if (Amt != 0) { + ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); + unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); + UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc); + } else + UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg); + break; + } + case ARMII::AddrMode3 : { + bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM3Offset(OffImm); + if (OffReg == 0) + // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. + UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(Amt); + else + UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg); + break; + } + } + + std::vector<MachineInstr*> NewMIs; + if (isPre) { + if (isLoad) + MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg()) + .addReg(WBReg).addReg(0).addImm(0); + else + MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(WBReg).addReg(0).addImm(0); + NewMIs.push_back(MemMI); + NewMIs.push_back(UpdateMI); + } else { + if (isLoad) + MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg()) + .addReg(BaseReg).addReg(0).addImm(0); + else + MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(BaseReg).addReg(0).addImm(0); + if (WB.isDead()) + UpdateMI->getOperand(0).setIsDead(); + NewMIs.push_back(UpdateMI); + NewMIs.push_back(MemMI); + } + + // Transfer LiveVariables states, kill / dead info. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegister() && MO.getReg() && + MRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); + if (MO.isDef()) { + MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; + if (MO.isDead()) + LV.addVirtualRegisterDead(Reg, NewMI); + // Update the defining instruction. + if (VI.DefInst == MI) + VI.DefInst = NewMI; + } + if (MO.isUse() && MO.isKill()) { + for (unsigned j = 0; j < 2; ++j) { + // Look at the two new MI's in reverse order. + MachineInstr *NewMI = NewMIs[j]; + MachineOperand *NMO = NewMI->findRegisterUseOperand(Reg); + if (!NMO) + continue; + LV.addVirtualRegisterKilled(Reg, NewMI); + if (VI.removeKill(MI)) + VI.Kills.push_back(NewMI); + break; + } + } + } + } + + MFI->insert(MBBI, NewMIs[1]); + MFI->insert(MBBI, NewMIs[0]); + return NewMIs[0]; +} + +// Branch analysis. +bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + std::vector<MachineOperand> &Cond) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) { + if (LastOpc == ARM::B || LastOpc == ARM::tB) { + TBB = LastInst->getOperand(0).getMachineBasicBlock(); + return false; + } + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMachineBasicBlock(); + Cond.push_back(LastInst->getOperand(1)); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isTerminatorInstr((--I)->getOpcode())) + return true; + + // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it. + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMachineBasicBlock(); + Cond.push_back(SecondLastInst->getOperand(1)); + FBB = LastInst->getOperand(0).getMachineBasicBlock(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + + +void ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc) + return; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return; + --I; + if (I->getOpcode() != BccOpc) + return; + + // Remove the branch. + I->eraseFromParent(); +} + +void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const std::vector<MachineOperand> &Cond) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B; + int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc; + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "ARM branch conditions have two components!"); + + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch? + BuildMI(&MBB, get(BOpc)).addMBB(TBB); + else + BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm()); + return; + } + + // Two-way conditional branch. + BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm()); + BuildMI(&MBB, get(BOpc)).addMBB(FBB); +} + +bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::B: + case ARM::tB: // Uncond branch. + case ARM::BR_JTr: // Jumptable branch. + case ARM::BR_JTm: // Jumptable branch through mem. + case ARM::BR_JTadd: // Jumptable branch add to pc. + return true; + default: return false; + } +} + +bool ARMInstrInfo:: +ReverseBranchCondition(std::vector<MachineOperand> &Cond) const { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 0621c70d14..0208121f14 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.h - ARM Instruction Information --------------*- C++ -*-===// +//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,11 +19,56 @@ #include "ARMRegisterInfo.h" namespace llvm { + class ARMSubtarget; + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace ARMII { + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This three-bit field describes the addressing mode used. Zero is unused + // so that we can tell if we forgot to set a value. + + AddrModeMask = 0xf, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrModeT1 = 6, + AddrModeT2 = 7, + AddrModeT4 = 8, + AddrModeTs = 9, // i8 * 4 for pc and sp relative data + + // Size* - Flags to keep track of the size of an instruction. + SizeShift = 4, + SizeMask = 7 << SizeShift, + SizeSpecial = 1, // 0 byte pseudo or special case. + Size8Bytes = 2, + Size4Bytes = 3, + Size2Bytes = 4, + + // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load + // and store ops + IndexModeShift = 7, + IndexModeMask = 3 << IndexModeShift, + IndexModePre = 1, + IndexModePost = 2, + + // Opcode + OpcodeShift = 9, + OpcodeMask = 0xf << OpcodeShift + }; +} class ARMInstrInfo : public TargetInstrInfo { const ARMRegisterInfo RI; public: - ARMInstrInfo(); + ARMInstrInfo(const ARMSubtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -35,15 +80,33 @@ public: /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass() const; + /// getDWARF_LABELOpcode - Return the opcode of the target's DWARF_LABEL + /// instruction if it has one. This is used by codegen passes that update + /// DWARF line number info as they modify the code. + virtual unsigned getDWARF_LABELOpcode() const; + /// Return true if the instruction is a register to register move and /// leave the source and dest operands in the passed parameters. /// virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg) const; + virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const; + virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const; + + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables &LV) const; + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + std::vector<MachineOperand> &Cond) const; + virtual void RemoveBranch(MachineBasicBlock &MBB) const; virtual void InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const std::vector<MachineOperand> &Cond) const; + virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const; + virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index beed8abef9..bde81bcfe8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.td - Target Description for ARM Target ----------------===// +//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -12,351 +12,1184 @@ // //===----------------------------------------------------------------------===// -// Address operands -def op_addr_mode1 : Operand<iPTR> { - let PrintMethod = "printAddrMode1"; - let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm); +//===----------------------------------------------------------------------===// +// ARM specific DAG Nodes. +// + +// Type profiles. +def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; + +def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; + +def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; + +def SDT_ARMCMov : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def SDT_ARMBrcond : SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; + +def SDT_ARMBrJT : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + +def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; + +def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +// Node definitions. +def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; +def ARMWrapperCall : SDNode<"ARMISD::WrapperCall", SDTIntUnaryOp>; +def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; + +def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq, + [SDNPHasChain, SDNPOutFlag]>; +def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeq, + [SDNPHasChain, SDNPOutFlag]>; + +def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTRet, + [SDNPHasChain, SDNPOptInFlag]>; + +def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, + [SDNPInFlag]>; +def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, + [SDNPInFlag]>; + +def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, + [SDNPHasChain]>; + +def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, + [SDNPOutFlag]>; + +def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; + +def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; + +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; + +//===----------------------------------------------------------------------===// +// ARM Flag Definitions. + +class RegConstraint<string C> { + string Constraints = C; } -def op_addr_mode2 : Operand<iPTR> { - let PrintMethod = "printAddrMode2"; - let MIOperandInfo = (ops ptr_rc, i32imm); +//===----------------------------------------------------------------------===// +// ARM specific transformation functions and pattern fragments. +// + +// so_imm_XFORM - Return a so_imm value packed into the format described for +// so_imm def below. +def so_imm_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getValue()), + MVT::i32); +}]>; + +// so_imm_neg_XFORM - Return a so_imm value packed into the format described for +// so_imm_neg def below. +def so_imm_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getValue()), + MVT::i32); +}]>; + +// so_imm_not_XFORM - Return a so_imm value packed into the format described for +// so_imm_not def below. +def so_imm_not_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getValue()), + MVT::i32); +}]>; + +// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24. +def rot_imm : PatLeaf<(i32 imm), [{ + int32_t v = (int32_t)N->getValue(); + return v == 8 || v == 16 || v == 24; +}]>; + +/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. +def imm1_15 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getValue() >= 1 && (int32_t)N->getValue() < 16; +}]>; + +/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. +def imm16_31 : PatLeaf<(i32 imm), [{ + return (int32_t)N->getValue() >= 16 && (int32_t)N->getValue() < 32; +}]>; + +def so_imm_neg : + PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getValue()) != -1; }], + so_imm_neg_XFORM>; + +def so_imm_not : + PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(int)N->getValue()) != -1; }], + so_imm_not_XFORM>; + +// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. +def sext_16_node : PatLeaf<(i32 GPR:$a), [{ + return TLI.ComputeNumSignBits(SDOperand(N,0)) >= 17; +}]>; + + +// Break so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to +// get the first/second pieces. +def so_imm2part : PatLeaf<(imm), [{ + return ARM_AM::isSOImmTwoPartVal((unsigned)N->getValue()); +}]>; + +def so_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + +def so_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getValue()); + return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); +}]>; + + + +//===----------------------------------------------------------------------===// +// Operand Definitions. +// + +// Branch target. +def brtarget : Operand<OtherVT>; + +// Operand for printing out a condition code. +def CCOp : Operand<i32> { + let PrintMethod = "printCCOperand"; +} + +// A list of registers separated by comma. Used by load/store multiple. +def reglist : Operand<i32> { + let PrintMethod = "printRegisterList"; +} + +// An operand for the CONSTPOOL_ENTRY pseudo-instruction. +def cpinst_operand : Operand<i32> { + let PrintMethod = "printCPInstOperand"; +} + +def jtblock_operand : Operand<i32> { + let PrintMethod = "printJTBlockOperand"; +} + +// Local PC labels. +def pclabel : Operand<i32> { + let PrintMethod = "printPCLabel"; +} + +// shifter_operand operands: so_reg and so_imm. +def so_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectShifterOperandReg", + [shl,srl,sra,rotr]> { + let PrintMethod = "printSORegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} + +// so_imm - Match a 32-bit shifter_operand immediate operand, which is an +// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are +// represented in the imm field in the same 12-bit form that they are encoded +// into so_imm instructions: the 8-bit immediate is the least significant bits +// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. +def so_imm : Operand<i32>, + PatLeaf<(imm), + [{ return ARM_AM::getSOImmVal(N->getValue()) != -1; }], + so_imm_XFORM> { + let PrintMethod = "printSOImmOperand"; } -def op_addr_mode5 : Operand<iPTR> { - let PrintMethod = "printAddrMode5"; - let MIOperandInfo = (ops ptr_rc, i32imm); + +// Define ARM specific addressing modes. + +// addrmode2 := reg +/- reg shop imm +// addrmode2 := reg +/- imm12 +// +def addrmode2 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode2", []> { + let PrintMethod = "printAddrMode2Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } -// Define ARM specific addressing mode. -//Addressing Mode 1: data processing operands -def addr_mode1 : ComplexPattern<iPTR, 3, "SelectAddrMode1", [imm, sra, shl, srl], - []>; +def am2offset : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> { + let PrintMethod = "printAddrMode2OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} -//Addressing Mode 2: Load and Store Word or Unsigned Byte -def addr_mode2 : ComplexPattern<iPTR, 2, "SelectAddrMode2", [], []>; +// addrmode3 := reg +/- reg +// addrmode3 := reg +/- imm8 +// +def addrmode3 : Operand<i32>, + ComplexPattern<i32, 3, "SelectAddrMode3", []> { + let PrintMethod = "printAddrMode3Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} -//Addressing Mode 5: VFP load/store -def addr_mode5 : ComplexPattern<iPTR, 2, "SelectAddrMode5", [], []>; +def am3offset : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> { + let PrintMethod = "printAddrMode3OffsetOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode4 := reg, <mode|W> +// +def addrmode4 : Operand<i32>, + ComplexPattern<i32, 2, "", []> { + let PrintMethod = "printAddrMode4Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmode5 := reg +/- imm8*4 +// +def addrmode5 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5", []> { + let PrintMethod = "printAddrMode5Operand"; + let MIOperandInfo = (ops GPR, i32imm); +} + +// addrmodepc := pc + reg +// +def addrmodepc : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrModePC", []> { + let PrintMethod = "printAddrModePCOperand"; + let MIOperandInfo = (ops GPR, i32imm); +} //===----------------------------------------------------------------------===// -// Instruction Class Templates +// ARM Instruction flags. These need to match ARMInstrInfo.h. +// + +// Addressing mode. +class AddrMode<bits<4> val> { + bits<4> Value = val; +} +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrModeT1 : AddrMode<6>; +def AddrModeT2 : AddrMode<7>; +def AddrModeT4 : AddrMode<8>; +def AddrModeTs : AddrMode<9>; + +// Instruction size. +class SizeFlagVal<bits<3> val> { + bits<3> Value = val; +} +def SizeInvalid : SizeFlagVal<0>; // Unset. +def SizeSpecial : SizeFlagVal<1>; // Pseudo or special. +def Size8Bytes : SizeFlagVal<2>; +def Size4Bytes : SizeFlagVal<3>; +def Size2Bytes : SizeFlagVal<4>; + +// Load / store index mode. +class IndexMode<bits<2> val> { + bits<2> Value = val; +} +def IndexModeNone : IndexMode<0>; +def IndexModePre : IndexMode<1>; +def IndexModePost : IndexMode<2>; + //===----------------------------------------------------------------------===// -class InstARM<dag ops, string asmstr, list<dag> pattern> : Instruction { +// ARM Instruction templates. +// + +// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode. +class ARMPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM]; +} +class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV6]; +} + + +class InstARM<bits<4> opcod, AddrMode am, SizeFlagVal sz, IndexMode im, + dag ops, string asmstr, string cstr> + : Instruction { let Namespace = "ARM"; + bits<4> Opcode = opcod; + AddrMode AM = am; + bits<4> AddrModeBits = AM.Value; + + SizeFlagVal SZ = sz; + bits<3> SizeFlag = SZ.Value; + + IndexMode IM = im; + bits<2> IndexModeBits = IM.Value; + dag OperandList = ops; let AsmString = asmstr; + let Constraints = cstr; +} + +class PseudoInst<dag ops, string asm, list<dag> pattern> + : InstARM<0, AddrModeNone, SizeSpecial, IndexModeNone, ops, asm, ""> { + let Pattern = pattern; +} + +class I<dag ops, AddrMode am, SizeFlagVal sz, IndexMode im, + string asm, string cstr, list<dag> pattern> + // FIXME: Set all opcodes to 0 for now. + : InstARM<0, am, sz, im, ops, asm, cstr> { let Pattern = pattern; + list<Predicate> Predicates = [IsARM]; } -class IntBinOp<string OpcStr, SDNode OpNode> : - InstARM<(ops IntRegs:$dst, IntRegs:$a, IntRegs:$b), - !strconcat(OpcStr, " $dst, $a, $b"), - [(set IntRegs:$dst, (OpNode IntRegs:$a, IntRegs:$b))]>; +class AI<dag ops, string asm, list<dag> pattern> + : I<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern>; +class AI1<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode1, Size4Bytes, IndexModeNone, asm, "", pattern>; +class AI2<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode2, Size4Bytes, IndexModeNone, asm, "", pattern>; +class AI3<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode3, Size4Bytes, IndexModeNone, asm, "", pattern>; +class AI4<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode4, Size4Bytes, IndexModeNone, asm, "", pattern>; +class AIx2<dag ops, string asm, list<dag> pattern> + : I<ops, AddrModeNone, Size8Bytes, IndexModeNone, asm, "", pattern>; + +// Pre-indexed ops +class AI2pr<dag ops, string asm, string cstr, list<dag> pattern> + : I<ops, AddrMode2, Size4Bytes, IndexModePre, asm, cstr, pattern>; +class AI3pr<dag ops, string asm, string cstr, list<dag> pattern> + : I<ops, AddrMode3, Size4Bytes, IndexModePre, asm, cstr, pattern>; + +// Post-indexed ops +class AI2po<dag ops, string asm, string cstr, list<dag> pattern> + : I<ops, AddrMode2, Size4Bytes, IndexModePost, asm, cstr, pattern>; +class AI3po<dag ops, string asm, string cstr, list<dag> pattern> + : I<ops, AddrMode3, Size4Bytes, IndexModePost, asm, cstr, pattern>; + +// BR_JT instructions +class JTI<dag ops, string asm, list<dag> pattern> + : I<ops, AddrModeNone, SizeSpecial, IndexModeNone, asm, "", pattern>; +class JTI1<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode1, SizeSpecial, IndexModeNone, asm, "", pattern>; +class JTI2<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode2, SizeSpecial, IndexModeNone, asm, "", pattern>; + + +class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; + + +/// AI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a +/// binop that produces a value. +multiclass AI1_bin_irs<string opc, PatFrag opnode> { + def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>; + def rr : AI1<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>; + def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>; +} -class FPBinOp<string OpcStr, SDNode OpNode> : - InstARM<(ops FPRegs:$dst, FPRegs:$a, FPRegs:$b), - !strconcat(OpcStr, " $dst, $a, $b"), - [(set FPRegs:$dst, (OpNode FPRegs:$a, FPRegs:$b))]>; +/// AI1_bin0_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns. +/// Similar to AI1_bin_irs except the instruction does not produce a result. +multiclass AI1_bin0_irs<string opc, PatFrag opnode> { + def ri : AI1<(ops GPR:$a, so_imm:$b), + !strconcat(opc, " $a, $b"), + [(opnode GPR:$a, so_imm:$b)]>; + def rr : AI1<(ops GPR:$a, GPR:$b), + !strconcat(opc, " $a, $b"), + [(opnode GPR:$a, GPR:$b)]>; + def rs : AI1<(ops GPR:$a, so_reg:$b), + !strconcat(opc, " $a, $b"), + [(opnode GPR:$a, so_reg:$b)]>; +} -class DFPBinOp<string OpcStr, SDNode OpNode> : - InstARM<(ops DFPRegs:$dst, DFPRegs:$a, DFPRegs:$b), - !strconcat(OpcStr, " $dst, $a, $b"), - [(set DFPRegs:$dst, (OpNode DFPRegs:$a, DFPRegs:$b))]>; +/// AI1_bin_is - Defines a set of (op r, {so_imm|so_reg}) patterns for a binop. +multiclass AI1_bin_is<string opc, PatFrag opnode> { + def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>; + def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>; +} -class FPUnaryOp<string OpcStr, SDNode OpNode> : - InstARM<(ops FPRegs:$dst, FPRegs:$src), - !strconcat(OpcStr, " $dst, $src"), - [(set FPRegs:$dst, (OpNode FPRegs:$src))]>; +/// AI1_unary_irs - Defines a set of (op {so_imm|r|so_reg}) patterns for unary +/// ops. +multiclass AI1_unary_irs<string opc, PatFrag opnode> { + def i : AI1<(ops GPR:$dst, so_imm:$a), + !strconcat(opc, " $dst, $a"), + [(set GPR:$dst, (opnode so_imm:$a))]>; + def r : AI1<(ops GPR:$dst, GPR:$a), + !strconcat(opc, " $dst, $a"), + [(set GPR:$dst, (opnode GPR:$a))]>; + def s : AI1<(ops GPR:$dst, so_reg:$a), + !strconcat(opc, " $dst, $a"), + [(set GPR:$dst, (opnode so_reg:$a))]>; +} -class DFPUnaryOp<string OpcStr, SDNode OpNode> : - InstARM<(ops DFPRegs:$dst, DFPRegs:$src), - !strconcat(OpcStr, " $dst, $src"), - [(set DFPRegs:$dst, (OpNode DFPRegs:$src))]>; +/// AI_unary_rrot - A unary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass AI_unary_rrot<string opc, PatFrag opnode> { + def r : AI<(ops GPR:$dst, GPR:$Src), + !strconcat(opc, " $dst, $Src"), + [(set GPR:$dst, (opnode GPR:$Src))]>, Requires<[IsARM, HasV6]>; + def r_rot : AI<(ops GPR:$dst, GPR:$Src, i32imm:$rot), + !strconcat(opc, " $dst, $Src, ror $rot"), + [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]>; +} + +/// AI_bin_rrot - A binary operation with two forms: one whose operand is a +/// register and one whose operand is a register rotated by 8/16/24. +multiclass AI_bin_rrot<string opc, PatFrag opnode> { + def rr : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS), + !strconcat(opc, " $dst, $LHS, $RHS"), + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + Requires<[IsARM, HasV6]>; + def rr_rot : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS, i32imm:$rot), + !strconcat(opc, " $dst, $LHS, $RHS, ror $rot"), + [(set GPR:$dst, (opnode GPR:$LHS, + (rotr GPR:$RHS, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]>; +} -class Addr1BinOp<string OpcStr, SDNode OpNode> : - InstARM<(ops IntRegs:$dst, IntRegs:$a, op_addr_mode1:$b), - !strconcat(OpcStr, " $dst, $a, $b"), - [(set IntRegs:$dst, (OpNode IntRegs:$a, addr_mode1:$b))]>; //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// -def brtarget : Operand<OtherVT>; +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// +def IMPLICIT_DEF_GPR : +PseudoInst<(ops GPR:$rD), + "@ IMPLICIT_DEF_GPR $rD", + [(set GPR:$rD, (undef))]>; + + +/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in +/// the function. The first operand is the ID# for this instruction, the second +/// is the index into the MachineConstantPool that this is, the third is the +/// size in bytes of this constant pool entry. +def CONSTPOOL_ENTRY : +PseudoInst<(ops cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), + "${instid:label} ${cpidx:cpentry}", []>; + +def ADJCALLSTACKUP : +PseudoInst<(ops i32imm:$amt), + "@ ADJCALLSTACKUP $amt", + [(ARMcallseq_end imm:$amt)]>, Imp<[SP],[SP]>; + +def ADJCALLSTACKDOWN : +PseudoInst<(ops i32imm:$amt), + "@ ADJCALLSTACKDOWN $amt", + [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>; + +def DWARF_LOC : +PseudoInst<(ops i32imm:$line, i32imm:$col, i32imm:$file), + ".loc $file, $line, $col", + [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; + +def DWARF_LABEL : +PseudoInst<(ops i32imm:$id), + "\nLdebug_loc${id:no_hash}:", + [(dwarf_label (i32 imm:$id))]>; + +def PICADD : AI1<(ops GPR:$dst, GPR:$a, pclabel:$cp), + "\n$cp:\n\tadd $dst, pc, $a", + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; +let AddedComplexity = 10 in +def PICLD : AI2<(ops GPR:$dst, addrmodepc:$addr), + "\n${addr:label}:\n\tldr $dst, $addr", + [(set GPR:$dst, (load addrmodepc:$addr))]>; -// Operand for printing out a condition code. -let PrintMethod = "printCCOperand" in - def CCOp : Operand<i32>; +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// -def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq, - [SDNPHasChain, SDNPOutFlag]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeq, - [SDNPHasChain, SDNPOutFlag]>; +let isReturn = 1, isTerminator = 1 in + def BX_RET : AI<(ops), "bx lr", [(ARMretflag)]>; + +// FIXME: remove when we have a way to marking a MI with these properties. +let isLoad = 1, isReturn = 1, isTerminator = 1 in + def LDM_RET : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops), + "ldm${addr:submode} $addr, $dst1", + []>; + +let isCall = 1, noResults = 1, + Defs = [R0, R1, R2, R3, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7] in { + def BL : AI<(ops i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>; + // ARMv5T and above + def BLX : AI<(ops GPR:$dst, variable_ops), + "blx $dst", + [(ARMcall GPR:$dst)]>, Requires<[IsARM, HasV5T]>; + // ARMv4T + def BX : AIx2<(ops GPR:$dst, variable_ops), + "mov lr, pc\n\tbx $dst", + [(ARMcall_nolink GPR:$dst)]>; +} -def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def retflag : SDNode<"ARMISD::RET_FLAG", SDTRet, - [SDNPHasChain, SDNPOptInFlag]>; - -def SDTarmselect : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; -def armselect : SDNode<"ARMISD::SELECT", SDTarmselect, [SDNPInFlag, SDNPOutFlag]>; - -def SDTarmfmstat : SDTypeProfile<0, 0, []>; -def armfmstat : SDNode<"ARMISD::FMSTAT", SDTarmfmstat, [SDNPInFlag, SDNPOutFlag]>; - -def SDTarmbr : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; -def armbr : SDNode<"ARMISD::BR", SDTarmbr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; - -def SDTVoidBinOp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; -def armcmp : SDNode<"ARMISD::CMP", SDTVoidBinOp, [SDNPOutFlag]>; - -def armfsitos : SDNode<"ARMISD::FSITOS", SDTUnaryOp>; -def armftosis : SDNode<"ARMISD::FTOSIS", SDTUnaryOp>; -def armfsitod : SDNode<"ARMISD::FSITOD", SDTUnaryOp>; -def armftosid : SDNode<"ARMISD::FTOSID", SDTUnaryOp>; -def armfuitos : SDNode<"ARMISD::FUITOS", SDTUnaryOp>; -def armftouis : SDNode<"ARMISD::FTOUIS", SDTUnaryOp>; -def armfuitod : SDNode<"ARMISD::FUITOD", SDTUnaryOp>; -def armftouid : SDNode<"ARMISD::FTOUID", SDTUnaryOp>; - -def SDTarmfmrrd : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisFP<2>]>; -def armfmrrd : SDNode<"ARMISD::FMRRD", SDTarmfmrrd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; - -def SDTarmfmdrr : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>; -def armfmdrr : SDNode<"ARMISD::FMDRR", SDTarmfmdrr, []>; +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def B : AI<(ops brtarget:$dst), "b $dst", + [(br bb:$dst)]>; + + def BR_JTr : JTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id), + "mov pc, $dst \n$jt", + [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>; + def BR_JTm : JTI2<(ops addrmode2:$dst, jtblock_operand:$jt, i32imm:$id), + "ldr pc, $dst \n$jt", + [(ARMbrjt (i32 (load addrmode2:$dst)), tjumptable:$jt, + imm:$id)]>; + def BR_JTadd : JTI1<(ops GPR:$dst, GPR:$idx, jtblock_operand:$jt, i32imm:$id), + "add pc, $dst, $idx \n$jt", + [(ARMbrjt (add GPR:$dst, GPR:$idx), tjumptable:$jt, + imm:$id)]>; +} -def ADJCALLSTACKUP : InstARM<(ops i32imm:$amt), - "!ADJCALLSTACKUP $amt", - [(callseq_end imm:$amt)]>, Imp<[R13],[R13]>; - -def ADJCALLSTACKDOWN : InstARM<(ops i32imm:$amt), - "!ADJCALLSTACKDOWN $amt", - [(callseq_start imm:$amt)]>, Imp<[R13],[R13]>; - -def IMPLICIT_DEF_Int : InstARM<(ops IntRegs:$dst), - "@IMPLICIT_DEF $dst", - [(set IntRegs:$dst, (undef))]>; -def IMPLICIT_DEF_FP : InstARM<(ops FPRegs:$dst), "@IMPLICIT_DEF $dst", - [(set FPRegs:$dst, (undef))]>; -def IMPLICIT_DEF_DFP : InstARM<(ops DFPRegs:$dst), "@IMPLICIT_DEF $dst", - [(set DFPRegs:$dst, (undef))]>; +let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in + def Bcc : AI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst", + [(ARMbrcond bb:$dst, imm:$cc)]>; -let isReturn = 1 in { - def bx: InstARM<(ops), "bx r14", [(retflag)]>; -} +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// -let noResults = 1, Defs = [R0, R1, R2, R3, R14] in { - def bl: InstARM<(ops i32imm:$func, variable_ops), "bl $func", []>; - def blx : InstARM<(ops IntRegs:$func, variable_ops), "blx $func", [(ARMcall IntRegs:$func)]>; -} +// Load +let isLoad = 1 in { +def LDR : AI2<(ops GPR:$dst, addrmode2:$addr), + "ldr $dst, $addr", + [(set GPR:$dst, (load addrmode2:$addr))]>; + +// Loads with zero extension +def LDRH : AI3<(ops GPR:$dst, addrmode3:$addr), + "ldrh $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; + +def LDRB : AI2<(ops GPR:$dst, addrmode2:$addr), + "ldrb $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; + +// Loads with sign extension +def LDRSH : AI3<(ops GPR:$dst, addrmode3:$addr), + "ldrsh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; + +def LDRSB : AI3<(ops GPR:$dst, addrmode3:$addr), + "ldrsb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; + +// Load doubleword +def LDRD : AI3<(ops GPR:$dst, addrmode3:$addr), + "ldrd $dst, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed loads +def LDR_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr), + "ldr $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDR_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base, am2offset:$offset), + "ldr $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr), + "ldrh $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRH_POST : AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset), + "ldrh $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRB_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr), + "ldrb $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRB_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am2offset:$offset), + "ldrb $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr), + "ldrsh $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSH_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset), + "ldrsh $dst, [$base], $offset", "$base = $base_wb", []>; + +def LDRSB_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr), + "ldrsb $dst, $addr!", "$addr.base = $base_wb", []>; + +def LDRSB_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset), + "ldrsb $dst, [$base], $offset", "$base = $base_wb", []>; +} // isLoad + +// Store +let isStore = 1 in { +def STR : AI2<(ops GPR:$src, addrmode2:$addr), + "str $src, $addr", + [(store GPR:$src, addrmode2:$addr)]>; + +// Stores with truncate +def STRH : AI3<(ops GPR:$src, addrmode3:$addr), + "strh $src, $addr", + [(truncstorei16 GPR:$src, addrmode3:$addr)]>; + +def STRB : AI2<(ops GPR:$src, addrmode2:$addr), + "strb $src, $addr", + [(truncstorei8 GPR:$src, addrmode2:$addr)]>; + +// Store doubleword +def STRD : AI3<(ops GPR:$src, addrmode3:$addr), + "strd $src, $addr", + []>, Requires<[IsARM, HasV5T]>; + +// Indexed stores +def STR_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base, am2offset:$offset), + "str $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STR_POST : AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset), + "str $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, + (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; + +def STRH_PRE : AI3pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset), + "strh $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, + (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; + +def STRH_POST: AI3po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset), + "strh $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti16 GPR:$src, + GPR:$base, am3offset:$offset))]>; + +def STRB_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset), + "strb $src, [$base, $offset]!", "$base = $base_wb", + [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; + +def STRB_POST: AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset), + "strb $src, [$base], $offset", "$base = $base_wb", + [(set GPR:$base_wb, (post_truncsti8 GPR:$src, + GPR:$base, am2offset:$offset))]>; +} // isStore -def LDR : InstARM<(ops IntRegs:$dst, op_addr_mode2:$addr), - "ldr $dst, $addr", - [(set IntRegs:$dst, (load addr_mode2:$addr))]>; +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// -def LDRB : InstARM<(ops IntRegs:$dst, IntRegs:$addr), - "ldrb $dst, [$addr]", - [(set IntRegs:$dst, (zextloadi8 IntRegs:$addr))]>; +let isLoad = 1 in +def LDM : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops), + "ldm${addr:submode} $addr, $dst1", + []>; -def LDRSB : InstARM<(ops IntRegs:$dst, IntRegs:$addr), - "ldrsb $dst, [$addr]", - [(set IntRegs:$dst, (sextloadi8 IntRegs:$addr))]>; +let isStore = 1 in +def STM : AI4<(ops addrmode4:$addr, reglist:$src1, variable_ops), + "stm${addr:submode} $addr, $src1", + []>; -def LDRH : InstARM<(ops IntRegs:$dst, IntRegs:$addr), - "ldrh $dst, [$addr]", - [(set IntRegs:$dst, (zextloadi16 IntRegs:$addr))]>; +//===----------------------------------------------------------------------===// +// Move Instructions. +// -def LDRSH : InstARM<(ops IntRegs:$dst, IntRegs:$addr), - "ldrsh $dst, [$addr]", - [(set IntRegs:$dst, (sextloadi16 IntRegs:$addr))]>; +def MOVrr : AI1<(ops GPR:$dst, GPR:$src), + "mov $dst, $src", []>; +def MOVrs : AI1<(ops GPR:$dst, so_reg:$src), + "mov $dst, $src", [(set GPR:$dst, so_reg:$src)]>; +def MOVri : AI1<(ops GPR:$dst, so_imm:$src), + "mov $dst, $src", [(set GPR:$dst, so_imm:$src)]>; -def STR : InstARM<(ops IntRegs:$src, op_addr_mode2:$addr), - "str $src, $addr", - [(store IntRegs:$src, addr_mode2:$addr)]>; +// These aren't really mov instructions, but we have to define them this way +// due to flag operands. -def STRB : InstARM<(ops IntRegs:$src, IntRegs:$addr), - "strb $src, [$addr]", - [(truncstorei8 IntRegs:$src, IntRegs:$addr)]>; +def MOVsrl_flag : AI1<(ops GPR:$dst, GPR:$src), + "movs $dst, $src, lsr #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>; +def MOVsra_flag : AI1<(ops GPR:$dst, GPR:$src), + "movs $dst, $src, asr #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>; +def MOVrrx : AI1<(ops GPR:$dst, GPR:$src), + "mov $dst, $src, rrx", + [(set GPR:$dst, (ARMrrx GPR:$src))]>; -def STRH : InstARM<(ops IntRegs:$src, IntRegs:$addr), - "strh $src, [$addr]", - [(truncstorei16 IntRegs:$src, IntRegs:$addr)]>; -def MOV : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src), - "mov $dst, $src", [(set IntRegs:$dst, addr_mode1:$src)]>; +//===----------------------------------------------------------------------===// +// Extend Instructions. +// -def MVN : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src), - "mvn $dst, $src", [(set IntRegs:$dst, (not addr_mode1:$src))]>; +// Sign extenders -def ADD : Addr1BinOp<"add", add>; -def ADCS : Addr1BinOp<"adcs", adde>; -def ADDS : Addr1BinOp<"adds", addc>; -def SUB : Addr1BinOp<"sub", sub>; -def SBCS : Addr1BinOp<"sbcs", sube>; -def SUBS : Addr1BinOp<"subs", subc>; -def AND : Addr1BinOp<"and", and>; -def EOR : Addr1BinOp<"eor", xor>; -def ORR : Addr1BinOp<"orr", or>; +defm SXTB : AI_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm SXTH : AI_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -let isTwoAddress = 1 in { - def movcond : InstARM<(ops IntRegs:$dst, IntRegs:$false, - op_addr_mode1:$true, CCOp:$cc), - "mov$cc $dst, $true", - [(set IntRegs:$dst, (armselect addr_mode1:$true, - IntRegs:$false, imm:$cc))]>; +defm SXTAB : AI_bin_rrot<"sxtab", + BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; +defm SXTAH : AI_bin_rrot<"sxtah", + BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; - def fcpyscond : InstARM<(ops FPRegs:$dst, FPRegs:$false, - FPRegs:$true, CCOp:$cc), - "fcpys$cc $dst, $true", - [(set FPRegs:$dst, (armselect FPRegs:$true, - FPRegs:$false, imm:$cc))]>; +// TODO: SXT(A){B|H}16 - def fcpydcond : InstARM<(ops DFPRegs:$dst, DFPRegs:$false, - DFPRegs:$true, CCOp:$cc), - "fcpyd$cc $dst, $true", - [(set DFPRegs:$dst, (armselect DFPRegs:$true, - DFPRegs:$false, imm:$cc))]>; -} +// Zero extenders -def MUL : IntBinOp<"mul", mul>; +let AddedComplexity = 16 in { +defm UXTB : AI_unary_rrot<"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm UXTH : AI_unary_rrot<"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm UXTB16 : AI_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -let Defs = [R0] in { - def SMULL : IntBinOp<"smull r12,", mulhs>; - def UMULL : IntBinOp<"umull r12,", mulhu>; +def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 24)>; +def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF), + (UXTB16r_rot GPR:$Src, 8)>; + +defm UXTAB : AI_bin_rrot<"uxtab", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; +defm UXTAH : AI_bin_rrot<"uxtah", + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } -let isTerminator = 1, isBranch = 1 in { - def bcond : InstARM<(ops brtarget:$dst, CCOp:$cc), - "b$cc $dst", - [(armbr bb:$dst, imm:$cc)]>; +// This isn't safe in general, the add is two 16-bit units, not a 32-bit add. +//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>; - def b : InstARM<(ops brtarget:$dst), - "b $dst", - [(br bb:$dst)]>; -} +// TODO: UXT(A){B|H}16 + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +defm ADD : AI1_bin_irs<"add" , BinOpFrag<(add node:$LHS, node:$RHS)>>; +defm ADDS : AI1_bin_irs<"adds", BinOpFrag<(addc node:$LHS, node:$RHS)>>; +defm ADC : AI1_bin_irs<"adc" , BinOpFrag<(adde node:$LHS, node:$RHS)>>; +defm SUB : AI1_bin_irs<"sub" , BinOpFrag<(sub node:$LHS, node:$RHS)>>; +defm SUBS : AI1_bin_irs<"subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; +defm SBC : AI1_bin_irs<"sbc" , BinOpFrag<(sube node:$LHS, node:$RHS)>>; -def cmp : InstARM<(ops IntRegs:$a, op_addr_mode1:$b), - "cmp $a, $b", - [(armcmp IntRegs:$a, addr_mode1:$b)]>; +// These don't define reg/reg forms, because they are handled above. +defm RSB : AI1_bin_is <"rsb" , BinOpFrag<(sub node:$RHS, node:$LHS)>>; +defm RSBS : AI1_bin_is <"rsbs", BinOpFrag<(subc node:$RHS, node:$LHS)>>; +defm RSC : AI1_bin_is <"rsc" , BinOpFrag<(sube node:$RHS, node:$LHS)>>; -// Floating Point Compare -def fcmps : InstARM<(ops FPRegs:$a, FPRegs:$b), - "fcmps $a, $b", - [(armcmp FPRegs:$a, FPRegs:$b)]>; +// (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +def : ARMPat<(add GPR:$src, so_imm_neg:$imm), + (SUBri GPR:$src, so_imm_neg:$imm)>; -def fcmpd : InstARM<(ops DFPRegs:$a, DFPRegs:$b), - "fcmpd $a, $b", - [(armcmp DFPRegs:$a, DFPRegs:$b)]>; +//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), +// (SUBSri GPR:$src, so_imm_neg:$imm)>; +//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm), +// (SBCri GPR:$src, so_imm_neg:$imm)>; -// Floating Point Copy -def FCPYS : InstARM<(ops FPRegs:$dst, FPRegs:$src), "fcpys $dst, $src", []>; +// Note: These are implemented in C++ code, because they have to generate +// ADD/SUBrs instructions, which use a complex pattern that a xform function +// cannot produce. +// (mul X, 2^n+1) -> (add (X << n), X) +// (mul X, 2^n-1) -> (rsb X, (X << n)) -def FCPYD : InstARM<(ops DFPRegs:$dst, DFPRegs:$src), "fcpyd $dst, $src", []>; -// Floating Point Conversion -// We use bitconvert for moving the data between the register classes. -// The format conversion is done with ARM specific nodes +//===----------------------------------------------------------------------===// +// Bitwise Instructions. +// -def FMSR : InstARM<(ops FPRegs:$dst, IntRegs:$src), - "fmsr $dst, $src", [(set FPRegs:$dst, (bitconvert IntRegs:$src))]>; +defm AND : AI1_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>>; +defm ORR : AI1_bin_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>>; +defm EOR : AI1_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>; +defm BIC : AI1_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -def FMRS : InstARM<(ops IntRegs:$dst, FPRegs:$src), - "fmrs $dst, $src", [(set IntRegs:$dst, (bitconvert FPRegs:$src))]>; +defm MVN : AI1_unary_irs<"mvn", not>; -def FMRRD : InstARM<(ops IntRegs:$i0, IntRegs:$i1, DFPRegs:$src), - "fmrrd $i0, $i1, $src", [(armfmrrd IntRegs:$i0, IntRegs:$i1, DFPRegs:$src)]>; +def : ARMPat<(i32 so_imm_not:$imm), + (MVNi so_imm_not:$imm)>; -def FMDRR : InstARM<(ops DFPRegs:$dst, IntRegs:$i0, IntRegs:$i1), - "fmdrr $dst, $i0, $i1", [(set DFPRegs:$dst, (armfmdrr IntRegs:$i0, IntRegs:$i1))]>; +def : ARMPat<(and GPR:$src, so_imm_not:$imm), + (BICri GPR:$src, so_imm_not:$imm)>; -def FSITOS : InstARM<(ops FPRegs:$dst, FPRegs:$src), - "fsitos $dst, $src", [(set FPRegs:$dst, (armfsitos FPRegs:$src))]>; +//===----------------------------------------------------------------------===// +// Multiply Instructions. +// -def FTOSIS : InstARM<(ops FPRegs:$dst, FPRegs:$src), - "ftosis $dst, $src", [(set FPRegs:$dst, (armftosis FPRegs:$src))]>; +// AI_orr - Defines a (op r, r) pattern. +class AI_orr<string opc, SDNode opnode> + : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, " $dst, $a, $b"), + [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>; + +// AI_oorr - Defines a (op (op r, r), r) pattern. +class AI_oorr<string opc, SDNode opnode1, SDNode opnode2> + : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c), + !strconcat(opc, " $dst, $a, $b, $c"), + [(set GPR:$dst, (opnode1 (opnode2 GPR:$a, GPR:$b), GPR:$c))]>; + +def MUL : AI_orr<"mul", mul>; +def MLA : AI_oorr<"mla", add, mul>; + +// Extra precision multiplies with low / high results +def SMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b), + "smull $ldst, $hdst, $a, $b", + []>; + +def UMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b), + "umull $ldst, $hdst, $a, $b", + []>; + +// Multiply + accumulate +def SMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b), + "smlal $ldst, $hdst, $a, $b", + []>; + +def UMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b), + "umlal $ldst, $hdst, $a, $b", + []>; + +def UMAAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b), + "umaal $ldst, $hdst, $a, $b", + []>, Requires<[IsARM, HasV6]>; + +// Most significant word multiply +def SMMUL : AI_orr<"smmul", mulhs>, Requires<[IsARM, HasV6]>; +def SMMLA : AI_oorr<"smmla", add, mulhs>, Requires<[IsARM, HasV6]>; + + +def SMMLS : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c), + "smmls $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, + Requires<[IsARM, HasV6]>; + +multiclass AI_smul<string opc, PatFrag opnode> { + def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bb $dst, $a, $b"), + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]>; + def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bb $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16), + (sra (shl GPR:$b, 16), 16)))]>, + Requires<[IsARM, HasV5TE]>; + def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bb $dst, $a, $b"), + [(set GPR:$dst, (opnode sext_16_node:$a, sext_16_node:$b))]>, + Requires<[IsARM, HasV5TE]>; + + def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bt $dst, $a, $b"), + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]>; + def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bt $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]>; + def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "bt $dst, $a, $b"), + [(set GPR:$dst, (opnode sext_16_node:$a, (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]>; + + def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "tb $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16)))]>, + Requires<[IsARM, HasV5TE]>; + def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "tb $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sra (shl GPR:$b, 16), 16)))]>, + Requires<[IsARM, HasV5TE]>; + def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "tb $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra GPR:$a, 16), sext_16_node:$b))]>, + Requires<[IsARM, HasV5TE]>; + + def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "tt $dst, $a, $b"), + [(set GPR:$dst, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16)))]>, + Requires<[IsARM, HasV5TE]>; + + def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "wb $dst, $a, $b"), + [(set GPR:$dst, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16))]>, + Requires<[IsARM, HasV5TE]>; + def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "wb $dst, $a, $b"), + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra (shl GPR:$b, 16), 16)), 16))]>, + Requires<[IsARM, HasV5TE]>; + def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "wb $dst, $a, $b"), + [(set GPR:$dst, (sra (opnode GPR:$a, sext_16_node:$b), 16))]>, + Requires<[IsARM, HasV5TE]>; + + def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b), + !strconcat(opc, "wt $dst, $a, $b"), + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16))]>, + Requires<[IsARM, HasV5TE]>; +} -def FSITOD : InstARM<(ops DFPRegs:$dst, FPRegs:$src), - "fsitod $dst, $src", [(set DFPRegs:$dst, (armfsitod FPRegs:$src))]>; +multiclass AI_smla<string opc, PatFrag opnode> { + def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, + (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]>; + def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, + (opnode (sra (shl GPR:$a, 16), 16), + (sra (shl GPR:$b, 16), 16))))]>, + Requires<[IsARM, HasV5TE]>; + def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, + (opnode sext_16_node:$a, sext_16_node:$b)))]>, + Requires<[IsARM, HasV5TE]>; + + def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]>; + def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra (shl GPR:$a, 16), 16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]>; + def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "bt $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode sext_16_node:$a, + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]>; + + def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sext_inreg GPR:$b, i16))))]>, + Requires<[IsARM, HasV5TE]>; + def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sra (shl GPR:$b, 16), 16))))]>, + Requires<[IsARM, HasV5TE]>; + def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + sext_16_node:$b)))]>, + Requires<[IsARM, HasV5TE]>; + + def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "tt $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16), + (sra GPR:$b, 16))))]>, + Requires<[IsARM, HasV5TE]>; + + def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), 16)))]>, + Requires<[IsARM, HasV5TE]>; + def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra (shl GPR:$b, 16), 16)), 16)))]>, + Requires<[IsARM, HasV5TE]>; + def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wb $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + sext_16_node:$b), 16)))]>, + Requires<[IsARM, HasV5TE]>; + + def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc), + !strconcat(opc, "wt $dst, $a, $b, $acc"), + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra GPR:$b, 16)), 16)))]>, + Requires<[IsARM, HasV5TE]>; +} -def FTOSID : InstARM<(ops FPRegs:$dst, DFPRegs:$src), - "ftosid $dst, $src", [(set FPRegs:$dst, (armftosid DFPRegs:$src))]>; +defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -def FUITOS : InstARM<(ops FPRegs:$dst, FPRegs:$src), - "fuitos $dst, $src", [(set FPRegs:$dst, (armfuitos FPRegs:$src))]>; +// TODO: Halfword multiple accumulate long: SMLAL<x><y> +// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -def FTOUIS : InstARM<(ops FPRegs:$dst, FPRegs:$src), - "ftouis $dst, $src", [(set FPRegs:$dst, (armftouis FPRegs:$src))]>; +//===----------------------------------------------------------------------===// +// Misc. Arithmetic Instructions. +// -def FUITOD : InstARM<(ops DFPRegs:$dst, FPRegs:$src), - "fuitod $dst, $src", [(set DFPRegs:$dst, (armfuitod FPRegs:$src))]>; +def CLZ : AI<(ops GPR:$dst, GPR:$src), + "clz $dst, $src", + [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]>; + +def REV : AI<(ops GPR:$dst, GPR:$src), + "rev $dst, $src", + [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]>; + +def REV16 : AI<(ops GPR:$dst, GPR:$src), + "rev16 $dst, $src", + [(set GPR:$dst, + (or (and (srl GPR:$src, 8), 0xFF), + (or (and (shl GPR:$src, 8), 0xFF00), + (or (and (srl GPR:$src, 8), 0xFF0000), + (and (shl GPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsARM, HasV6]>; + +def REVSH : AI<(ops GPR:$dst, GPR:$src), + "revsh $dst, $src", + [(set GPR:$dst, + (sext_inreg + (or (srl (and GPR:$src, 0xFFFF), 8), + (shl GPR:$src, 8)), i16))]>, + Requires<[IsARM, HasV6]>; + +def PKHBT : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhbt $dst, $src1, $src2, LSL $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), + (and (shl GPR:$src2, (i32 imm:$shamt)), + 0xFFFF0000)))]>, + Requires<[IsARM, HasV6]>; + +// Alternate cases for PKHBT where identities eliminate some nodes. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), + (PKHBT GPR:$src1, GPR:$src2, 0)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), + (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + + +def PKHTB : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt), + "pkhtb $dst, $src1, $src2, ASR $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), + (and (sra GPR:$src2, imm16_31:$shamt), + 0xFFFF)))]>, Requires<[IsARM, HasV6]>; + +// Alternate cases for PKHTB where identities eliminate some nodes. Note that +// a shift amount of 0 is *not legal* here, it is PKHBT instead. +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)), + (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), + (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; -def FTOUID : InstARM<(ops FPRegs:$dst, DFPRegs:$src), - "ftouid $dst, $src", [(set FPRegs:$dst, (armftouid DFPRegs:$src))]>; -def FCVTDS : InstARM<(ops DFPRegs:$dst, FPRegs:$src), - "fcvtds $dst, $src", [(set DFPRegs:$dst, (fextend FPRegs:$src))]>; +//===----------------------------------------------------------------------===// +// Comparison Instructions... +// + +defm CMP : AI1_bin0_irs<"cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm CMN : AI1_bin0_irs<"cmn", BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; + +def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + +// Note that TST/TEQ don't set all the same flags that CMP does! +def TSTrr : AI1<(ops GPR:$a, so_reg:$b), "tst $a, $b", []>; +def TSTri : AI1<(ops GPR:$a, so_imm:$b), "tst $a, $b", []>; +def TEQrr : AI1<(ops GPR:$a, so_reg:$b), "teq $a, $b", []>; +def TEQri : AI1<(ops GPR:$a, so_imm:$b), "teq $a, $b", []>; + +// Conditional moves +def MOVCCr : AI<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc), + "mov$cc $dst, $true", + [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>, + RegConstraint<"$false = $dst">; + +def MOVCCs : AI<(ops GPR:$dst, GPR:$false, so_reg:$true, CCOp:$cc), + "mov$cc $dst, $true", + [(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true,imm:$cc))]>, + RegConstraint<"$false = $dst">; + +def MOVCCi : AI<(ops GPR:$dst, GPR:$false, so_imm:$true, CCOp:$cc), + "mov$cc $dst, $true", + [(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true,imm:$cc))]>, + RegConstraint<"$false = $dst">; + + +// LEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def LEApcrel : AI1<(ops GPR:$dst, i32imm:$label), + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add $dst, pc, #PCRELV${:uid}")), + []>; + +def LEApcrelJT : AI1<(ops GPR:$dst, i32imm:$label, i32imm:$id), + !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add $dst, pc, #PCRELV${:uid}")), + []>; -def FCVTSD : InstARM<(ops FPRegs:$dst, DFPRegs:$src), - "fcvtsd $dst, $src", [(set FPRegs:$dst, (fround DFPRegs:$src))]>; +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// -def FMSTAT : InstARM<(ops ), "fmstat", [(armfmstat)]>; +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; -// Floating Point Arithmetic -def FADDS : FPBinOp<"fadds", fadd>; -def FADDD : DFPBinOp<"faddd", fadd>; -def FSUBS : FPBinOp<"fsubs", fsub>; -def FSUBD : DFPBinOp<"fsubd", fsub>; +// Large immediate handling. -def FNEGS : FPUnaryOp<"fnegs", fneg>; -def FNEGD : DFPUnaryOp<"fnegd", fneg>; -def FABSS : FPUnaryOp<"fabss", fabs>; -def FABSD : DFPUnaryOp<"fabsd", fabs>; +// Two piece so_imms. +def : ARMPat<(i32 so_imm2part:$src), + (ORRri (MOVri (so_imm2part_1 imm:$src)), + (so_imm2part_2 imm:$src))>; -def FMULS : FPBinOp<"fmuls", fmul>; -def FMULD : DFPBinOp<"fmuld", fmul>; -def FDIVS : FPBinOp<"fdivs", fdiv>; -def FDIVD : DFPBinOp<"fdivd", fdiv>; +def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), + (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), + (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; -// Floating Point Load -def FLDS : InstARM<(ops FPRegs:$dst, op_addr_mode5:$addr), - "flds $dst, $addr", - [(set FPRegs:$dst, (load addr_mode5:$addr))]>; +// TODO: add,sub,and, 3-instr forms? -def FLDD : InstARM<(ops DFPRegs:$dst, op_addr_mode5:$addr), - "fldd $dst, $addr", - [(set DFPRegs:$dst, (load addr_mode5:$addr))]>; -// Floating Point Store -def FSTS : InstARM<(ops FPRegs:$src, op_addr_mode5:$addr), - "fsts $src, $addr", - [(store FPRegs:$src, addr_mode5:$addr)]>; +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; -def FSTD : InstARM<(ops DFPRegs:$src, op_addr_mode5:$addr), - "fstd $src, $addr", - [(store DFPRegs:$src, addr_mode5:$addr)]>; +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; -def : Pat<(ARMcall tglobaladdr:$dst), - (bl tglobaladdr:$dst)>; +// extload -> zextload +def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; -def : Pat<(ARMcall texternalsym:$dst), - (bl texternalsym:$dst)>; +// truncstore i1 -> truncstore i8 +def : Pat<(truncstorei1 GPR:$src, addrmode2:$dst), + (STRB GPR:$src, addrmode2:$dst)>; +def : Pat<(pre_truncsti1 GPR:$src, GPR:$base, am2offset:$offset), + (STRB_PRE GPR:$src, GPR:$base, am2offset:$offset)>; +def : Pat<(post_truncsti1 GPR:$src, GPR:$base, am2offset:$offset), + (STRB_POST GPR:$src, GPR:$base, am2offset:$offset)>; -def : Pat<(extloadi8 IntRegs:$addr), - (LDRB IntRegs:$addr)>; -def : Pat<(extloadi16 IntRegs:$addr), - (LDRH IntRegs:$addr)>; +//===----------------------------------------------------------------------===// +// Thumb Support +// -// extload bool -> extload byte -def : Pat<(extloadi1 IntRegs:$addr), (LDRB IntRegs:$addr)>; +include "ARMInstrThumb.td" -// zextload bool -> zextload byte -def : Pat<(i32 (zextloadi1 IntRegs:$addr)), (LDRB IntRegs:$addr)>; +//===----------------------------------------------------------------------===// +// Floating Point Support +// -// truncstore bool -> truncstore byte. -def : Pat<(truncstorei1 IntRegs:$src, IntRegs:$addr), - (STRB IntRegs:$src, IntRegs:$addr)>; +include "ARMInstrVFP.td" diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td new file mode 100644 index 0000000000..58cef04188 --- /dev/null +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -0,0 +1,513 @@ +//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Thumb instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Thumb specific DAG Nodes. +// + +def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +// TI - Thumb instruction. + +// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode. +class ThumbPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb]; +} + +class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsThumb, HasV5T]; +} + +class ThumbI<dag ops, AddrMode am, SizeFlagVal sz, + string asm, string cstr, list<dag> pattern> + // FIXME: Set all opcodes to 0 for now. + : InstARM<0, am, sz, IndexModeNone, ops, asm, cstr> { + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb]; +} + +class TI<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>; +class TI1<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>; +class TI2<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>; +class TI4<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>; +class TIs<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>; + +// Two-address instructions +class TIt<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>; + +// BL, BLX(1) are translated by assembler into two instructions +class TIx2<dag ops, string asm, list<dag> pattern> + : ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>; + +def imm_neg_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32); +}]>; +def imm_comp_XFORM : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32); +}]>; + + +/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. +def imm0_7 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getValue() < 8; +}]>; +def imm0_7_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)-N->getValue() < 8; +}], imm_neg_XFORM>; + +def imm0_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getValue() < 256; +}]>; +def imm0_255_comp : PatLeaf<(i32 imm), [{ + return ~((uint32_t)N->getValue()) < 256; +}]>; + +def imm8_255 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256; +}]>; +def imm8_255_neg : PatLeaf<(i32 imm), [{ + unsigned Val = -N->getValue(); + return Val >= 8 && Val < 256; +}], imm_neg_XFORM>; + +// Break imm's up into two pieces: an immediate + a left shift. +// This uses thumb_immshifted to match and thumb_immshifted_val and +// thumb_immshifted_shamt to get the val/shift pieces. +def thumb_immshifted : PatLeaf<(imm), [{ + return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue()); +}]>; + +def thumb_immshifted_val : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def thumb_immshifted_shamt : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +// Define Thumb specific addressing modes. + +// t_addrmode_rr := reg + reg +// +def t_addrmode_rr : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg); +} + +// t_addrmode_ri5_{1|2|4} := reg + imm5 * {1|2|4} +// +def t_addrmode_ri5_1 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_1", []> { + let PrintMethod = "printThumbAddrModeRI5_1Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} +def t_addrmode_ri5_2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_2", []> { + let PrintMethod = "printThumbAddrModeRI5_2Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} +def t_addrmode_ri5_4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_4", []> { + let PrintMethod = "printThumbAddrModeRI5_4Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +// t_addrmode_sp := sp + imm8 * 4 +// +def t_addrmode_sp : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { + let PrintMethod = "printThumbAddrModeSPOperand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp), + "\n$cp:\n\tadd $dst, pc", + [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +let isReturn = 1, isTerminator = 1 in + def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>; + +// FIXME: remove when we have a way to marking a MI with these properties. +let isLoad = 1, isReturn = 1, isTerminator = 1 in +def tPOP_RET : TI<(ops reglist:$dst1, variable_ops), + "pop $dst1", []>; + +let isCall = 1, noResults = 1, + Defs = [R0, R1, R2, R3, LR, + D0, D1, D2, D3, D4, D5, D6, D7] in { + def tBL : TIx2<(ops i32imm:$func, variable_ops), + "bl ${func:call}", + [(ARMtcall tglobaladdr:$func)]>; + // ARMv5T and above + def tBLXi : TIx2<(ops i32imm:$func, variable_ops), + "blx ${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>; + def tBLXr : TI<(ops GPR:$dst, variable_ops), + "blx $dst", + [(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>; + // ARMv4T + def tBX : TIx2<(ops GPR:$dst, variable_ops), + "cpy lr, pc\n\tbx $dst", + [(ARMcall_nolink GPR:$dst)]>; +} + +let isBranch = 1, isTerminator = 1, isBarrier = 1 in + def tB : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>; + +let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in + def tBcc : TI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst", + [(ARMbrcond bb:$dst, imm:$cc)]>; + +//===----------------------------------------------------------------------===// +// Load Store Instructions. +// + +let isLoad = 1 in { +def tLDRri : TI4<(ops GPR:$dst, t_addrmode_ri5_4:$addr), + "ldr $dst, $addr", + [(set GPR:$dst, (load t_addrmode_ri5_4:$addr))]>; + +def tLDRrr : TI<(ops GPR:$dst, t_addrmode_rr:$addr), + "ldr $dst, $addr", + [(set GPR:$dst, (load t_addrmode_rr:$addr))]>; +// def tLDRpci +def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr), + "ldr $dst, $addr", + [(set GPR:$dst, (load t_addrmode_sp:$addr))]>; + +def tLDRBri : TI1<(ops GPR:$dst, t_addrmode_ri5_1:$addr), + "ldrb $dst, $addr", + [(set GPR:$dst, (zextloadi8 t_addrmode_ri5_1:$addr))]>; + +def tLDRBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr), + "ldrb $dst, $addr", + [(set GPR:$dst, (zextloadi8 t_addrmode_rr:$addr))]>; + +def tLDRHri : TI2<(ops GPR:$dst, t_addrmode_ri5_2:$addr), + "ldrh $dst, $addr", + [(set GPR:$dst, (zextloadi16 t_addrmode_ri5_2:$addr))]>; + +def tLDRHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr), + "ldrh $dst, $addr", + [(set GPR:$dst, (zextloadi16 t_addrmode_rr:$addr))]>; + +def tLDRSBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr), + "ldrsb $dst, $addr", + [(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; + +def tLDRSHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr), + "ldrsh $dst, $addr", + [(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; +} // isLoad + +let isStore = 1 in { +def tSTRri : TI4<(ops GPR:$src, t_addrmode_ri5_4:$addr), + "str $src, $addr", + [(store GPR:$src, t_addrmode_ri5_4:$addr)]>; + +def tSTRrr : TI<(ops GPR:$src, t_addrmode_rr:$addr), + "str $src, $addr", + [(store GPR:$src, t_addrmode_rr:$addr)]>; + +def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr), + "str $src, $addr", + [(store GPR:$src, t_addrmode_sp:$addr)]>; + +def tSTRBri : TI1<(ops GPR:$src, t_addrmode_ri5_1:$addr), + "strb $src, $addr", + [(truncstorei8 GPR:$src, t_addrmode_ri5_1:$addr)]>; + +def tSTRBrr : TI1<(ops GPR:$src, t_addrmode_rr:$addr), + "strb $src, $addr", + [(truncstorei8 GPR:$src, t_addrmode_rr:$addr)]>; + +def tSTRHri : TI2<(ops GPR:$src, t_addrmode_ri5_2:$addr), + "strh $src, $addr", + [(truncstorei16 GPR:$src, t_addrmode_ri5_1:$addr)]>; + +def tSTRHrr : TI2<(ops GPR:$src, t_addrmode_rr:$addr), + "strh $src, $addr", + [(truncstorei16 GPR:$src, t_addrmode_rr:$addr)]>; +} + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +// TODO: A7-44: LDMIA - load multiple + +let isLoad = 1 in +def tPOP : TI<(ops reglist:$dst1, variable_ops), + "pop $dst1", []>; + +let isStore = 1 in +def tPUSH : TI<(ops reglist:$src1, variable_ops), + "push $src1", []>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions. +// + +def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "add $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>; + +def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "add $dst, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>; + +def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "add $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>; + +def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "add $dst, $rhs", []>; + +def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs), + "add $dst, pc, $rhs * 4", []>; +def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs), + "add $dst, $sp, $rhs * 4", []>; +def tADDspi : TI<(ops GPR:$sp, i32imm:$rhs), + "add $sp, $rhs * 4", []>; + + +def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "and $dst, $rhs", + [(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>; + +def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "asr $dst, $lhs, $rhs", + [(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>; + +def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "asr $dst, $rhs", + [(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>; + +def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "bic $dst, $rhs", + [(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>; + + +def tCMN : TI<(ops GPR:$lhs, GPR:$rhs), + "cmn $lhs, $rhs", + [(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>; + +def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp GPR:$lhs, imm0_255:$rhs)]>; + +def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs), + "cmp $lhs, $rhs", + [(ARMcmp GPR:$lhs, GPR:$rhs)]>; + +// TODO: A7-37: CMP(3) - cmp hi regs + +def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "eor $dst, $rhs", + [(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>; + +def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "lsl $dst, $lhs, $rhs", + [(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>; + +def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "lsl $dst, $rhs", + [(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>; + +def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "lsr $dst, $lhs, $rhs", + [(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>; + +def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "lsr $dst, $rhs", + [(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>; + +def tMOVri8 : TI<(ops GPR:$dst, i32imm:$src), + "mov $dst, $src", + [(set GPR:$dst, imm0_255:$src)]>; + +// TODO: A7-73: MOV(2) - mov setting flag. + + +// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', +// which is MOV(3). This also supports high registers. +def tMOVrr : TI<(ops GPR:$dst, GPR:$src), + "cpy $dst, $src", []>; + +def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "mul $dst, $rhs", + [(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>; + +def tMVN : TI<(ops GPR:$dst, GPR:$src), + "mvn $dst, $src", + [(set GPR:$dst, (not GPR:$src))]>; + +def tNEG : TI<(ops GPR:$dst, GPR:$src), + "neg $dst, $src", + [(set GPR:$dst, (ineg GPR:$src))]>; + +def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "orr $dst, $rhs", + [(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>; + + +def tREV : TI<(ops GPR:$dst, GPR:$src), + "rev $dst, $src", + [(set GPR:$dst, (bswap GPR:$src))]>, + Requires<[IsThumb, HasV6]>; + +def tREV16 : TI<(ops GPR:$dst, GPR:$src), + "rev16 $dst, $src", + [(set GPR:$dst, + (or (and (srl GPR:$src, 8), 0xFF), + (or (and (shl GPR:$src, 8), 0xFF00), + (or (and (srl GPR:$src, 8), 0xFF0000), + (and (shl GPR:$src, 8), 0xFF000000)))))]>, + Requires<[IsThumb, HasV6]>; + +def tREVSH : TI<(ops GPR:$dst, GPR:$src), + "revsh $dst, $src", + [(set GPR:$dst, + (sext_inreg + (or (srl (and GPR:$src, 0xFFFF), 8), + (shl GPR:$src, 8)), i16))]>, + Requires<[IsThumb, HasV6]>; + +def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "ror $dst, $rhs", + [(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>; + +def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "sbc $dst, $rhs", + [(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>; + +// TODO: A7-96: STMIA - store multiple. + +def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "sub $dst, $lhs, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>; + +def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs), + "sub $dst, $rhs", + [(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>; + +def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs), + "sub $dst, $lhs, $rhs", + [(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>; + +def tSUBspi : TI<(ops GPR:$sp, i32imm:$rhs), + "sub $sp, $rhs * 4", []>; + +def tSXTB : TI<(ops GPR:$dst, GPR:$src), + "sxtb $dst, $src", + [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>, + Requires<[IsThumb, HasV6]>; +def tSXTH : TI<(ops GPR:$dst, GPR:$src), + "sxth $dst, $src", + [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>, + Requires<[IsThumb, HasV6]>; + +// TODO: A7-122: TST - test. + +def tUXTB : TI<(ops GPR:$dst, GPR:$src), + "uxtb $dst, $src", + [(set GPR:$dst, (and GPR:$src, 0xFF))]>, + Requires<[IsThumb, HasV6]>; +def tUXTH : TI<(ops GPR:$dst, GPR:$src), + "uxth $dst, $src", + [(set GPR:$dst, (and GPR:$src, 0xFFFF))]>, + Requires<[IsThumb, HasV6]>; + + +// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. +// Expanded by the scheduler into a branch sequence. +let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. + def tMOVCCr : + PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc), + "@ tMOVCCr $cc", + [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>; + +// tLEApcrel - Load a pc-relative address into a register without offending the +// assembler. +def tLEApcrel : TI<(ops GPR:$dst, i32imm:$label), + !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add $dst, pc, #PCRELV${:uid}")), + []>; + +def tLEApcrelCall : TI<(ops GPR:$dst, i32imm:$label), + !strconcat(!strconcat(".set PCRELV${:uid}, (${label:call}-(", + "${:private}PCRELL${:uid}+4))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add $dst, pc, #PCRELV${:uid}")), + []>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// ConstantPool, GlobalAddress +def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; +def : ThumbPat<(ARMWrapperCall tglobaladdr :$dst), + (tLEApcrelCall tglobaladdr :$dst)>; +def : ThumbPat<(ARMWrapperCall texternalsym:$dst), + (tLEApcrelCall texternalsym:$dst)>; + +// Direct calls +def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>; +def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>; + +// Indirect calls to ARM routines +def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>; + +// zextload i1 -> zextload i8 +def : ThumbPat<(zextloadi1 t_addrmode_ri5_1:$addr), + (tLDRBri t_addrmode_ri5_1:$addr)>; +def : ThumbPat<(zextloadi1 t_addrmode_rr:$addr), + (tLDRBri t_addrmode_rr:$addr)>; + +// truncstore i1 -> truncstore i8 +def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_ri5_1:$dst), + (tSTRBri GPR:$src, t_addrmode_ri5_1:$dst)>; +def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_rr:$dst), + (tSTRBrr GPR:$src, t_addrmode_rr:$dst)>; + +// Large immediate handling. + +// Two piece imms. +def : ThumbPat<(i32 thumb_immshifted:$src), + (tLSLri (tMOVri8 (thumb_immshifted_val imm:$src)), + (thumb_immshifted_shamt imm:$src))>; + +def : ThumbPat<(i32 imm0_255_comp:$src), + (tMVN (tMOVri8 (imm_comp_XFORM imm:$src)))>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td new file mode 100644 index 0000000000..cac8e4465c --- /dev/null +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -0,0 +1,359 @@ +//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the ARM VP instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ARM VFP Instruction templates. +// + +// ARM Float Instruction +class ASI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. +} + +class ASI5<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. +} + +// ARM Double Instruction +class ADI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. +} + +class ADI5<dag ops, string asm, list<dag> pattern> + : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> { + // TODO: Mark the instructions with the appropriate subtarget info. +} + +def SDT_FTOI : +SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; +def SDT_ITOF : +SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; +def SDT_CMPFP0 : +SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_FMDRR : +SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; + +def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; +def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; +def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; +def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; +def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; +def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>; +def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; + +//===----------------------------------------------------------------------===// +// Load / store Instructions. +// + +let isLoad = 1 in { +def FLDD : ADI5<(ops DPR:$dst, addrmode5:$addr), + "fldd $dst, $addr", + [(set DPR:$dst, (load addrmode5:$addr))]>; + +def FLDS : ASI5<(ops SPR:$dst, addrmode5:$addr), + "flds $dst, $addr", + [(set SPR:$dst, (load addrmode5:$addr))]>; +} // isLoad + +let isStore = 1 in { +def FSTD : ADI5<(ops DPR:$src, addrmode5:$addr), + "fstd $src, $addr", + [(store DPR:$src, addrmode5:$addr)]>; + +def FSTS : ASI5<(ops SPR:$src, addrmode5:$addr), + "fsts $src, $addr", + [(store SPR:$src, addrmode5:$addr)]>; +} // isStore + +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// + +let isLoad = 1 in { +def FLDMD : ADI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops), + "fldm${addr:submode}d ${addr:base}, $dst1", + []>; + +def FLDMS : ASI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops), + "fldm${addr:submode}s ${addr:base}, $dst1", + []>; +} // isLoad + +let isStore = 1 in { +def FSTMD : ADI5<(ops addrmode5:$addr, reglist:$src1, variable_ops), + "fstm${addr:submode}d ${addr:base}, $src1", + []>; + +def FSTMS : ASI5<(ops addrmode5:$addr, reglist:$src1, variable_ops), + "fstm${addr:submode}s ${addr:base}, $src1", + []>; +} // isStore + +// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores + +//===----------------------------------------------------------------------===// +// FP Binary Operations. +// + +def FADDD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b), + "faddd $dst, $a, $b", + [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; + +def FADDS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b), + "fadds $dst, $a, $b", + [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; + +def FCMPED : ADI<(ops DPR:$a, DPR:$b), + "fcmped $a, $b", + [(arm_cmpfp DPR:$a, DPR:$b)]>; + +def FCMPES : ASI<(ops SPR:$a, SPR:$b), + "fcmpes $a, $b", + [(arm_cmpfp SPR:$a, SPR:$b)]>; + +def FDIVD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b), + "fdivd $dst, $a, $b", + [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; + +def FDIVS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b), + "fdivs $dst, $a, $b", + [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; + +def FMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b), + "fmuld $dst, $a, $b", + [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; + +def FMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b), + "fmuls $dst, $a, $b", + [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; + + +def FNMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b), + "fnmuld $dst, $a, $b", + [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>; + +def FNMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b), + "fnmuls $dst, $a, $b", + [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>; + +def FSUBD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b), + "fsubd $dst, $a, $b", + [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>; + +def FSUBS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b), + "fsubs $dst, $a, $b", + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>; + +//===----------------------------------------------------------------------===// +// FP Unary Operations. +// + +def FABSD : ADI<(ops DPR:$dst, DPR:$a), + "fabsd $dst, $a", + [(set DPR:$dst, (fabs DPR:$a))]>; + +def FABSS : ASI<(ops SPR:$dst, SPR:$a), + "fabss $dst, $a", + [(set SPR:$dst, (fabs SPR:$a))]>; + +def FCMPEZD : ADI<(ops DPR:$a), + "fcmpezd $a", + [(arm_cmpfp0 DPR:$a)]>; + +def FCMPEZS : ASI<(ops SPR:$a), + "fcmpezs $a", + [(arm_cmpfp0 SPR:$a)]>; + +def FCVTDS : ADI<(ops DPR:$dst, SPR:$a), + "fcvtds $dst, $a", + [(set DPR:$dst, (fextend SPR:$a))]>; + +def FCVTSD : ADI<(ops SPR:$dst, DPR:$a), + "fcvtsd $dst, $a", + [(set SPR:$dst, (fround DPR:$a))]>; + +def FCPYD : ADI<(ops DPR:$dst, DPR:$a), + "fcpyd $dst, $a", + [/*(set DPR:$dst, DPR:$a)*/]>; + +def FCPYS : ASI<(ops SPR:$dst, SPR:$a), + "fcpys $dst, $a", + [/*(set SPR:$dst, SPR:$a)*/]>; + +def FNEGD : ADI<(ops DPR:$dst, DPR:$a), + "fnegd $dst, $a", + [(set DPR:$dst, (fneg DPR:$a))]>; + +def FNEGS : ASI<(ops SPR:$dst, SPR:$a), + "fnegs $dst, $a", + [(set SPR:$dst, (fneg SPR:$a))]>; + +def FSQRTD : ADI<(ops DPR:$dst, DPR:$a), + "fsqrtd $dst, $a", + [(set DPR:$dst, (fsqrt DPR:$a))]>; + +def FSQRTS : ASI<(ops SPR:$dst, SPR:$a), + "fsqrts $dst, $a", + [(set SPR:$dst, (fsqrt SPR:$a))]>; + +//===----------------------------------------------------------------------===// +// FP <-> GPR Copies. Int <-> FP Conversions. +// + +def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD), + "@ IMPLICIT_DEF_SPR $rD", + [(set SPR:$rD, (undef))]>; +def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD), + "@ IMPLICIT_DEF_DPR $rD", + [(set DPR:$rD, (undef))]>; + +def FMRS : ASI<(ops GPR:$dst, SPR:$src), + "fmrs $dst, $src", + [(set GPR:$dst, (bitconvert SPR:$src))]>; + +def FMSR : ASI<(ops SPR:$dst, GPR:$src), + "fmsr $dst, $src", + [(set SPR:$dst, (bitconvert GPR:$src))]>; + + +def FMRRD : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src), + "fmrrd $dst1, $dst2, $src", + [/* FIXME: Can't write pattern for multiple result instr*/]>; + +// FMDHR: GPR -> SPR +// FMDLR: GPR -> SPR + +def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2), + "fmdrr $dst, $src1, $src2", + [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; + +// FMRDH: SPR -> GPR +// FMRDL: SPR -> GPR +// FMRRS: SPR -> GPR +// FMRX : SPR system reg -> GPR + +// FMSRR: GPR -> SPR + + +def FMSTAT : ASI<(ops), "fmstat", [(arm_fmstat)]>; + +// FMXR: GPR -> VFP Sstem reg + + +// Int to FP: + +def FSITOD : ADI<(ops DPR:$dst, SPR:$a), + "fsitod $dst, $a", + [(set DPR:$dst, (arm_sitof SPR:$a))]>; + +def FSITOS : ASI<(ops SPR:$dst, SPR:$a), + "fsitos $dst, $a", + [(set SPR:$dst, (arm_sitof SPR:$a))]>; + +def FUITOD : ADI<(ops DPR:$dst, SPR:$a), + "fuitod $dst, $a", + [(set DPR:$dst, (arm_uitof SPR:$a))]>; + +def FUITOS : ASI<(ops SPR:$dst, SPR:$a), + "fuitos $dst, $a", + [(set SPR:$dst, (arm_uitof SPR:$a))]>; + +// FP to Int: +// Always set Z bit in the instruction, i.e. "round towards zero" variants. + +def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a), + "ftosizd $dst, $a", + [(set SPR:$dst, (arm_ftosi DPR:$a))]>; + +def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a), + "ftosizs $dst, $a", + [(set SPR:$dst, (arm_ftosi SPR:$a))]>; + +def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a), + "ftouizd $dst, $a", + [(set SPR:$dst, (arm_ftoui DPR:$a))]>; + +def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a), + "ftouizs $dst, $a", + [(set SPR:$dst, (arm_ftoui SPR:$a))]>; + +//===----------------------------------------------------------------------===// +// FP FMA Operations. +// + +def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b), + "fmacd $dst, $a, $b", + [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b), + "fmacs $dst, $a, $b", + [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b), + "fmscd $dst, $a, $b", + [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b), + "fmscs $dst, $a, $b", + [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b), + "fnmacd $dst, $a, $b", + [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b), + "fnmacs $dst, $a, $b", + [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b), + "fnmscd $dst, $a, $b", + [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b), + "fnmscs $dst, $a, $b", + [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; + +//===----------------------------------------------------------------------===// +// FP Conditional moves. +// + +def FCPYDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc), + "fcpyd$cc $dst, $true", + [(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))]>, + RegConstraint<"$false = $dst">; + +def FCPYScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc), + "fcpys$cc $dst, $true", + [(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))]>, + RegConstraint<"$false = $dst">; + +def FNEGDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc), + "fnegd$cc $dst, $true", + [(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))]>, + RegConstraint<"$false = $dst">; + +def FNEGScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc), + "fnegs$cc $dst, $true", + [(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))]>, + RegConstraint<"$false = $dst">; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp new file mode 100644 index 0000000000..704898e8aa --- /dev/null +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -0,0 +1,628 @@ +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs load / store related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-ldst-opt" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMRegisterInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +STATISTIC(NumLDMGened , "Number of ldm instructions generated"); +STATISTIC(NumSTMGened , "Number of stm instructions generated"); +STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); +STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); + +namespace { + struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { + const TargetInstrInfo *TII; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM load / store optimization pass"; + } + + private: + struct MemOpQueueEntry { + int Offset; + unsigned Position; + MachineBasicBlock::iterator MBBI; + bool Merged; + MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) + : Offset(o), Position(p), MBBI(i), Merged(false) {}; + }; + typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; + typedef MemOpQueue::iterator MemOpQueueIter; + + SmallVector<MachineBasicBlock::iterator, 4> + MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, + int Opcode, unsigned Size, MemOpQueue &MemOps); + + bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); + bool MergeReturnIntoLDM(MachineBasicBlock &MBB); + }; +} + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass() { + return new ARMLoadStoreOpt(); +} + +static int getLoadStoreMultipleOpcode(int Opcode) { + switch (Opcode) { + case ARM::LDR: + NumLDMGened++; + return ARM::LDM; + case ARM::STR: + NumSTMGened++; + return ARM::STM; + case ARM::FLDS: + NumFLDMGened++; + return ARM::FLDMS; + case ARM::FSTS: + NumFSTMGened++; + return ARM::FSTMS; + case ARM::FLDD: + NumFLDMGened++; + return ARM::FLDMD; + case ARM::FSTD: + NumFSTMGened++; + return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeOps - Create and insert a LDM or STM with Base as base register and +/// registers in Regs as the register operands that would be loaded / stored. +/// It returns true if the transformation is done. +static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, int Opcode, + SmallVector<unsigned, 8> &Regs, + const TargetInstrInfo *TII) { + // Only a single register to load / store. Don't bother. + unsigned NumRegs = Regs.size(); + if (NumRegs <= 1) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::ia; + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + if (isAM4 && Offset == 4) + Mode = ARM_AM::ib; + else if (isAM4 && Offset == -4 * (int)NumRegs + 4) + Mode = ARM_AM::da; + else if (isAM4 && Offset == -4 * (int)NumRegs) + Mode = ARM_AM::db; + else if (Offset != 0) { + // If starting offset isn't zero, insert a MI to materialize a new base. + // But only do so if it is cost effective, i.e. merging more than two + // loads / stores. + if (NumRegs <= 2) + return false; + + unsigned NewBase; + if (Opcode == ARM::LDR) + // If it is a load, then just use one of the destination register to + // use as the new base. + NewBase = Regs[NumRegs-1]; + else { + // FIXME: Try scavenging a register to use as a new base. + NewBase = ARM::R12; + } + int BaseOpc = ARM::ADDri; + if (Offset < 0) { + BaseOpc = ARM::SUBri; + Offset = - Offset; + } + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset == -1) + return false; // Probably not worth it then. + BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase).addReg(Base).addImm(ImmedOffset); + Base = NewBase; + } + + bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; + bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + Opcode = getLoadStoreMultipleOpcode(Opcode); + MachineInstrBuilder MIB = (isAM4) + ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base) + .addImm(ARM_AM::getAM4ModeImm(Mode)) + : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base) + .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)); + for (unsigned i = 0; i != NumRegs; ++i) + MIB = MIB.addReg(Regs[i], Opcode == isDef); + + return true; +} + +SmallVector<MachineBasicBlock::iterator, 4> +ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, + unsigned SIndex, unsigned Base, int Opcode, + unsigned Size, MemOpQueue &MemOps) { + bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + SmallVector<MachineBasicBlock::iterator, 4> Merges; + int Offset = MemOps[SIndex].Offset; + int SOffset = Offset; + unsigned Pos = MemOps[SIndex].Position; + MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; + SmallVector<unsigned, 8> Regs; + unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); + unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); + Regs.push_back(PReg); + for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { + int NewOffset = MemOps[i].Offset; + unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + // AM4 - register numbers in ascending order. + // AM5 - consecutive register numbers in ascending order. + if (NewOffset == Offset + (int)Size && + ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { + Offset += Size; + Regs.push_back(Reg); + PRegNum = RegNum; + } else { + // Can't merge this in. Try merge the earlier ones first. + if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned j = SIndex; j < i; ++j) { + MBB.erase(MemOps[j].MBBI); + MemOps[j].Merged = true; + } + } + SmallVector<MachineBasicBlock::iterator, 4> Merges2 = + MergeLDR_STR(MBB, i, Base, Opcode, Size, MemOps); + Merges.append(Merges2.begin(), Merges2.end()); + return Merges; + } + + if (MemOps[i].Position > Pos) { + Pos = MemOps[i].Position; + Loc = MemOps[i].MBBI; + } + } + + if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) { + Merges.push_back(prior(Loc)); + for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { + MBB.erase(MemOps[i].MBBI); + MemOps[i].Merged = true; + } + } + + return Merges; +} + +static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes) { + return (MI && MI->getOpcode() == ARM::SUBri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes); +} + +static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes) { + return (MI && MI->getOpcode() == ARM::ADDri && + MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes); +} + +static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return 0; + case ARM::LDR: + case ARM::STR: + case ARM::FLDS: + case ARM::FSTS: + return 4; + case ARM::FLDD: + case ARM::FSTD: + return 8; + case ARM::LDM: + case ARM::STM: + return (MI->getNumOperands() - 2) * 4; + case ARM::FLDMS: + case ARM::FSTMS: + case ARM::FLDMD: + case ARM::FSTMD: + return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + } +} + +/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base +/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// +/// stmia rn, <ra, rb, rc> +/// rn := rn + 4 * 3; +/// => +/// stmia rn!, <ra, rb, rc> +/// +/// rn := rn - 4 * 3; +/// ldmia rn, <ra, rb, rc> +/// => +/// ldmdb rn!, <ra, rb, rc> +static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(0).getReg(); + unsigned Bytes = getLSMultipleTransferSize(MI); + int Opcode = MI->getOpcode(); + bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; + + if (isAM4) { + if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) + return false; + + // Can't use the updating AM4 sub-mode if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; + } + + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); + MBB.erase(PrevMBBI); + return true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + MBB.erase(NextMBBI); + return true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + MBB.erase(NextMBBI); + return true; + } + } + } else { + // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) + return false; + + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); + unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); + MBB.erase(PrevMBBI); + return true; + } + } + + if (MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (Mode == ARM_AM::ia && + isMatchingIncrement(NextMBBI, Base, Bytes)) { + MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); + MBB.erase(NextMBBI); + } + return true; + } + } + + return false; +} + +static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_PRE; + case ARM::STR: return ARM::STR_PRE; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { + switch (Opc) { + case ARM::LDR: return ARM::LDR_POST; + case ARM::STR: return ARM::STR_POST; + case ARM::FLDS: return ARM::FLDMS; + case ARM::FLDD: return ARM::FLDMD; + case ARM::FSTS: return ARM::FSTMS; + case ARM::FSTD: return ARM::FSTMD; + default: abort(); + } + return 0; +} + +/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base +/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: +static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const TargetInstrInfo *TII) { + MachineInstr *MI = MBBI; + unsigned Base = MI->getOperand(1).getReg(); + unsigned Bytes = getLSMultipleTransferSize(MI); + int Opcode = MI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || + (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) + return false; + + bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + // Can't do the merge if the destination register is the same as the would-be + // writeback register. + if (isLd && MI->getOperand(0).getReg() == Base) + return false; + + bool DoMerge = false; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + unsigned NewOpc = 0; + if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + if (isMatchingDecrement(PrevMBBI, Base, Bytes)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) { + DoMerge = true; + NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) + MBB.erase(PrevMBBI); + } + + if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator NextMBBI = next(MBBI); + if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) { + DoMerge = true; + AddSub = ARM_AM::sub; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } else if (isMatchingIncrement(NextMBBI, Base, Bytes)) { + DoMerge = true; + NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + } + if (DoMerge) + MBB.erase(NextMBBI); + } + + if (!DoMerge) + return false; + + bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) + : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, + true, isDPR ? 2 : 1); + if (isLd) { + if (isAM2) + BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, true).addReg(Base).addReg(0).addImm(Offset); + else + BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base) + .addImm(Offset).addReg(MI->getOperand(0).getReg(), true); + } else { + if (isAM2) + BuildMI(MBB, MBBI, TII->get(NewOpc), Base).addReg(MI->getOperand(0).getReg()) + .addReg(Base).addReg(0).addImm(Offset); + else + BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base) + .addImm(Offset).addReg(MI->getOperand(0).getReg(), false); + } + MBB.erase(MBBI); + + return true; +} + +/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR +/// ops of the same base and incrementing offset into LDM / STM ops. +bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { + unsigned NumMerges = 0; + unsigned NumMemOps = 0; + MemOpQueue MemOps; + unsigned CurrBase = 0; + int CurrOpc = -1; + unsigned CurrSize = 0; + unsigned Position = 0; + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + bool Advance = false; + bool TryMerge = false; + bool Clobber = false; + + int Opcode = MBBI->getOpcode(); + bool isMemOp = false; + bool isAM2 = false; + unsigned Size = 4; + switch (Opcode) { + case ARM::LDR: + case ARM::STR: + isMemOp = + (MBBI->getOperand(1).isRegister() && MBBI->getOperand(2).getReg() == 0); + isAM2 = true; + break; + case ARM::FLDS: + case ARM::FSTS: + isMemOp = MBBI->getOperand(1).isRegister(); + break; + case ARM::FLDD: + case ARM::FSTD: + isMemOp = MBBI->getOperand(1).isRegister(); + Size = 8; + break; + } + if (isMemOp) { + unsigned Base = MBBI->getOperand(1).getReg(); + unsigned OffIdx = MBBI->getNumOperands()-1; + unsigned OffField = MBBI->getOperand(OffIdx).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + // Watch out for: + // r4 := ldr [r5] + // r5 := ldr [r5, #4] + // r6 := ldr [r5, #8] + // + // The second ldr has effectively broken the chain even though it + // looks like the later ldr(s) use the same base register. Try to + // merge the ldr's so far, including this one. But don't try to + // combine the following ldr(s). + Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); + if (CurrBase == 0 && !Clobber) { + // Start of a new chain. + CurrBase = Base; + CurrOpc = Opcode; + CurrSize = Size; + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + if (Clobber) { + TryMerge = true; + Advance = true; + } + + if (CurrOpc == Opcode && CurrBase == Base) { + // Continue adding to the queue. + if (Offset > MemOps.back().Offset) { + MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + } else { + for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); + I != E; ++I) { + if (Offset < I->Offset) { + MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); + NumMemOps++; + Advance = true; + break; + } else if (Offset == I->Offset) { + // Collision! This can't be merged! + break; + } + } + } + } + } + } + + if (Advance) { + ++Position; + ++MBBI; + } else + TryMerge = true; + + if (TryMerge) { + if (NumMemOps > 1) { + SmallVector<MachineBasicBlock::iterator,4> MBBII = + MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,MemOps); + // Try folding preceeding/trailing base inc/dec into the generated + // LDM/STM ops. + for (unsigned i = 0, e = MBBII.size(); i < e; ++i) + if (mergeBaseUpdateLSMultiple(MBB, MBBII[i])) + NumMerges++; + NumMerges += MBBII.size(); + } + + // Try folding preceeding/trailing base inc/dec into those load/store + // that were not merged to form LDM/STM ops. + for (unsigned i = 0; i != NumMemOps; ++i) + if (!MemOps[i].Merged) + if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII)) + NumMerges++; + + CurrBase = 0; + CurrOpc = -1; + if (NumMemOps) { + MemOps.clear(); + NumMemOps = 0; + } + + // If iterator hasn't been advanced and this is not a memory op, skip it. + // It can't start a new chain anyway. + if (!Advance && !isMemOp && MBBI != E) { + ++Position; + ++MBBI; + } + } + } + return NumMerges > 0; +} + +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op +/// (bx lr) into the preceeding stack restore so it directly restore the value +/// of LR into pc. +/// ldmfd sp!, {r7, lr} +/// bx lr +/// => +/// ldmfd sp!, {r7, pc} +bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + if (MBB.empty()) return false; + + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { + MachineInstr *PrevMI = prior(MBBI); + if (PrevMI->getOpcode() == ARM::LDM) { + MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); + if (MO.getReg() == ARM::LR) { + PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET)); + MO.setReg(ARM::PC); + MBB.erase(MBBI); + return true; + } + } + } + return false; +} + +bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TII = Fn.getTarget().getInstrInfo(); + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= LoadStoreMultipleOpti(MBB); + Modified |= MergeReturnIntoLDM(MBB); + } + return Modified; +} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h new file mode 100644 index 0000000000..4de3dbc410 --- /dev/null +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -0,0 +1,136 @@ +//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the Evan Cheng and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares ARM-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMACHINEFUNCTIONINFO_H +#define ARMMACHINEFUNCTIONINFO_H + +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +/// ARMFunctionInfo - This class is derived from MachineFunction private +/// ARM target-specific information for each MachineFunction. +class ARMFunctionInfo : public MachineFunctionInfo { + + /// isThumb - True if this function is compiled under Thumb mode. + /// + bool isThumb; + + /// VarArgsRegSaveSize - Size of the register save area for vararg functions. + /// + unsigned VarArgsRegSaveSize; + + /// FramePtrSpilled - True if FP register is spilled. Set by + /// processFunctionBeforeCalleeSavedScan(). + bool FramePtrSpilled; + + /// FramePtrSpillOffset - If FramePtrSpilled, this records the frame pointer + /// spill stack offset. + unsigned FramePtrSpillOffset; + + /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved + /// register spills areas. For Mac OS X: + /// + /// GPR callee-saved (1) : r4, r5, r6, r7, lr + /// -------------------------------------------- + /// GPR callee-saved (2) : r8, r10, r11 + /// -------------------------------------------- + /// DPR callee-saved : d8 - d15 + unsigned GPRCS1Offset; + unsigned GPRCS2Offset; + unsigned DPRCSOffset; + + /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills + /// areas. + unsigned GPRCS1Size; + unsigned GPRCS2Size; + unsigned DPRCSSize; + + /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices + /// which belong to these spill areas. + std::set<int> GPRCS1Frames; + std::set<int> GPRCS2Frames; + std::set<int> DPRCSFrames; + + /// JumpTableUId - Unique id for jumptables. + /// + unsigned JumpTableUId; + +public: + ARMFunctionInfo() : + isThumb(false), + VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0), + GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {} + + ARMFunctionInfo(MachineFunction &MF) : + isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), + VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0), + GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {} + + bool isThumbFunction() const { return isThumb; } + + unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; } + void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; } + + bool isFramePtrSpilled() const { return FramePtrSpilled; } + void setFramePtrSpilled(bool s) { FramePtrSpilled = s; } + unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } + void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } + unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } + unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } + + void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; } + void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; } + void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } + + unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } + unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } + + void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } + void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + + bool isGPRCalleeSavedArea1Frame(unsigned fi) const { + return GPRCS1Frames.count(fi); + } + bool isGPRCalleeSavedArea2Frame(unsigned fi) const { + return GPRCS2Frames.count(fi); + } + bool isDPRCalleeSavedAreaFrame(unsigned fi) const { + return DPRCSFrames.count(fi); + } + + void addGPRCalleeSavedArea1Frame(unsigned fi) { + GPRCS1Frames.insert(fi); + } + void addGPRCalleeSavedArea2Frame(unsigned fi) { + GPRCS2Frames.insert(fi); + } + void addDPRCalleeSavedAreaFrame(unsigned fi) { + DPRCSFrames.insert(fi); + } + + unsigned createJumpTableUId() { + return JumpTableUId++; + } +}; +} // End llvm namespace + +#endif // ARMMACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM/ARMMul.cpp b/lib/Target/ARM/ARMMul.cpp deleted file mode 100644 index c4eeaac479..0000000000 --- a/lib/Target/ARM/ARMMul.cpp +++ /dev/null @@ -1,75 +0,0 @@ -//===-- ARMMul.cpp - Define TargetMachine for A5CRM -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by the "Instituto Nokia de Tecnologia" and -// is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Modify the ARM multiplication instructions so that Rd{Hi,Lo} and Rm are distinct -// -//===----------------------------------------------------------------------===// - - -#include "ARM.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Compiler.h" - -using namespace llvm; - -namespace { - class VISIBILITY_HIDDEN FixMul : public MachineFunctionPass { - virtual bool runOnMachineFunction(MachineFunction &MF); - }; -} - -FunctionPass *llvm::createARMFixMulPass() { return new FixMul(); } - -bool FixMul::runOnMachineFunction(MachineFunction &MF) { - bool Changed = false; - - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - MachineBasicBlock &MBB = *BB; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr *MI = I; - - int Op = MI->getOpcode(); - if (Op == ARM::MUL || - Op == ARM::SMULL || - Op == ARM::UMULL) { - MachineOperand &RdOp = MI->getOperand(0); - MachineOperand &RmOp = MI->getOperand(1); - MachineOperand &RsOp = MI->getOperand(2); - - unsigned Rd = RdOp.getReg(); - unsigned Rm = RmOp.getReg(); - unsigned Rs = RsOp.getReg(); - - if (Rd == Rm) { - Changed = true; - if (Rd != Rs) { - //Rd and Rm must be distinct, but Rd can be equal to Rs. - //Swap Rs and Rm - RmOp.setReg(Rs); - RsOp.setReg(Rm); - } else { - unsigned scratch = Op == ARM::MUL ? ARM::R12 : ARM::R0; - BuildMI(MBB, I, MF.getTarget().getInstrInfo()->get(ARM::MOV), - scratch).addReg(Rm).addImm(0).addImm(ARMShift::LSL); - RmOp.setReg(scratch); - } - } - } - } - } - - return Changed; -} diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 16a1a29660..5ae482babc 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -13,243 +13,1023 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMCommon.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Type.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include <algorithm> +#include <iostream> using namespace llvm; -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// -static bool hasFP(const MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); -} - -static void splitInstructionWithImmediate(MachineBasicBlock &BB, - MachineBasicBlock::iterator I, - const TargetInstrDescriptor &TID, - unsigned DestReg, - unsigned OrigReg, - unsigned immediate){ - std::vector<unsigned> immediatePieces = splitImmediate(immediate); - std::vector<unsigned>::iterator it; - for (it=immediatePieces.begin(); it != immediatePieces.end(); ++it){ - BuildMI(BB, I, TID, DestReg).addReg(OrigReg) - .addImm(*it).addImm(0).addImm(ARMShift::LSL); +unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace ARM; + switch (RegEnum) { + case R0: case S0: case D0: return 0; + case R1: case S1: case D1: return 1; + case R2: case S2: case D2: return 2; + case R3: case S3: case D3: return 3; + case R4: case S4: case D4: return 4; + case R5: case S5: case D5: return 5; + case R6: case S6: case D6: return 6; + case R7: case S7: case D7: return 7; + case R8: case S8: case D8: return 8; + case R9: case S9: case D9: return 9; + case R10: case S10: case D10: return 10; + case R11: case S11: case D11: return 11; + case R12: case S12: case D12: return 12; + case SP: case S13: case D13: return 13; + case LR: case S14: case D14: return 14; + case PC: case S15: case D15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + default: + std::cerr << "Unknown ARM register!\n"; + abort(); } } -ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii) +ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, + const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), - TII(tii) { + TII(tii), STI(sti), + FramePtr(STI.useThumbBacktraces() ? ARM::R7 : ARM::R11) { +} + +bool ARMRegisterInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + MachineInstrBuilder MIB = BuildMI(MBB, MI, TII.get(ARM::tPUSH)); + for (unsigned i = CSI.size(); i != 0; --i) + MIB.addReg(CSI[i-1].getReg()); + return true; +} + +bool ARMRegisterInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (!AFI->isThumbFunction() || CSI.empty()) + return false; + + MachineInstr *PopMI = new MachineInstr(TII.get(ARM::tPOP)); + MBB.insert(MI, PopMI); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (Reg == ARM::LR) { + Reg = ARM::PC; + PopMI->setInstrDescriptor(TII.get(ARM::tPOP_RET)); + MBB.erase(MI); + } + PopMI->addRegOperand(Reg, true); + } + return true; } void ARMRegisterInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, int FI, const TargetRegisterClass *RC) const { - assert (RC == ARM::IntRegsRegisterClass); - BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI).addImm(0); + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->isThumbFunction()) + BuildMI(MBB, I, TII.get(ARM::tSTRspi)).addReg(SrcReg) + .addFrameIndex(FI).addImm(0); + else + BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg) + .addFrameIndex(FI).addReg(0).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + BuildMI(MBB, I, TII.get(ARM::FSTD)).addReg(SrcReg) + .addFrameIndex(FI).addImm(0); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + BuildMI(MBB, I, TII.get(ARM::FSTS)).addReg(SrcReg) + .addFrameIndex(FI).addImm(0); + } } void ARMRegisterInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { - assert (RC == ARM::IntRegsRegisterClass); - BuildMI(MBB, I, TII.get(ARM::LDR), DestReg).addFrameIndex(FI).addImm(0); + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->isThumbFunction()) + BuildMI(MBB, I, TII.get(ARM::tLDRspi), DestReg) + .addFrameIndex(FI).addImm(0); + else + BuildMI(MBB, I, TII.get(ARM::LDR), DestReg) + .addFrameIndex(FI).addReg(0).addImm(0); + } else if (RC == ARM::DPRRegisterClass) { + BuildMI(MBB, I, TII.get(ARM::FLDD), DestReg) + .addFrameIndex(FI).addImm(0); + } else { + assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); + BuildMI(MBB, I, TII.get(ARM::FLDS), DestReg) + .addFrameIndex(FI).addImm(0); + } } void ARMRegisterInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *RC) const { - assert(RC == ARM::IntRegsRegisterClass || - RC == ARM::FPRegsRegisterClass || - RC == ARM::DFPRegsRegisterClass); - - if (RC == ARM::IntRegsRegisterClass) - BuildMI(MBB, I, TII.get(ARM::MOV), DestReg).addReg(SrcReg).addImm(0) - .addImm(ARMShift::LSL); - else if (RC == ARM::FPRegsRegisterClass) + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *RC) const { + if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + BuildMI(MBB, I, TII.get(AFI->isThumbFunction() ? ARM::tMOVrr : ARM::MOVrr), + DestReg).addReg(SrcReg); + } else if (RC == ARM::SPRRegisterClass) BuildMI(MBB, I, TII.get(ARM::FCPYS), DestReg).addReg(SrcReg); - else + else if (RC == ARM::DPRRegisterClass) BuildMI(MBB, I, TII.get(ARM::FCPYD), DestReg).addReg(SrcReg); + else + abort(); } -MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr* MI, - unsigned OpNum, - int FI) const { - return NULL; +MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr *MI, + unsigned OpNum, int FI) const { + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + switch (Opc) { + default: break; + case ARM::MOVrr: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI) + .addReg(0).addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(TII.get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0) + .addImm(0); + } + break; + } + case ARM::tMOVrr: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(TII.get(ARM::tSTRspi)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(TII.get(ARM::tLDRspi), DstReg).addFrameIndex(FI) + .addImm(0); + } + break; + } + case ARM::FCPYS: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(TII.get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(TII.get(ARM::FLDS), DstReg).addFrameIndex(FI).addImm(0); + } + break; + } + case ARM::FCPYD: { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + NewMI = BuildMI(TII.get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI) + .addImm(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + NewMI = BuildMI(TII.get(ARM::FLDD), DstReg).addFrameIndex(FI).addImm(0); + } + break; + } + } + + if (NewMI) + NewMI->copyKillDeadInfo(MI); + return NewMI; } const unsigned* ARMRegisterInfo::getCalleeSavedRegs() const { static const unsigned CalleeSavedRegs[] = { - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, ARM::R11, - ARM::R14, 0 + ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, + ARM::R7, ARM::R6, ARM::R5, ARM::R4, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 }; - return CalleeSavedRegs; + + static const unsigned DarwinCalleeSavedRegs[] = { + ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, + ARM::R11, ARM::R10, ARM::R9, ARM::R8, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + return STI.isDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; } const TargetRegisterClass* const * ARMRegisterInfo::getCalleeSavedRegClasses() const { static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, - &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, - &ARM::IntRegsRegClass, 0 + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 }; return CalleeSavedRegClasses; } +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +/// +static bool hasFP(const MachineFunction &MF) { + return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// emitARMRegPlusImmediate - Emit a series of instructions to materialize +/// a destreg = basereg + immediate in ARM code. +static +void emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + while (NumBytes) { + unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); + unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); + assert(ThisVal && "Didn't extract field correctly"); + + // We will handle these bits from offset, clear them. + NumBytes &= ~ThisVal; + + // Get the properly encoded SOImmVal field. + int SOImmVal = ARM_AM::getSOImmVal(ThisVal); + assert(SOImmVal != -1 && "Bit extraction didn't work?"); + + // Build the new ADD / SUB. + BuildMI(MBB, MBBI, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg) + .addReg(BaseReg).addImm(SOImmVal); + BaseReg = DestReg; + } +} + +/// isLowRegister - Returns true if the register is low register r0-r7. +/// +static bool isLowRegister(unsigned Reg) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +/// emitThumbRegPlusImmediate - Emit a series of instructions to materialize +/// a destreg = basereg + immediate in Thumb code. +static +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII) { + bool isSub = NumBytes < 0; + unsigned Bytes = (unsigned)NumBytes; + if (isSub) Bytes = -NumBytes; + bool isMul4 = (Bytes & 3) == 0; + bool isTwoAddr = false; + unsigned NumBits = 1; + unsigned Opc = 0; + unsigned ExtraOpc = 0; + + if (DestReg == BaseReg && BaseReg == ARM::SP) { + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + Bytes >>= 2; // Implicitly multiplied by 4. + NumBits = 7; + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + isTwoAddr = true; + } else if (!isSub && BaseReg == ARM::SP) { + if (!isMul4) { + Bytes &= ~3; + ExtraOpc = ARM::tADDi3; + } + Bytes >>= 2; // Implicitly multiplied by 4. + NumBits = 8; + Opc = ARM::tADDrSPi; + } else { + if (DestReg != BaseReg) { + if (isLowRegister(DestReg) && isLowRegister(BaseReg)) { + // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) + unsigned Chunk = (1 << 3) - 1; + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + BuildMI(MBB, MBBI, TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) + .addReg(BaseReg).addImm(ThisVal); + } else { + BuildMI(MBB, MBBI, TII.get(ARM::tMOVrr), DestReg).addReg(BaseReg); + } + BaseReg = DestReg; + } + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + + unsigned Chunk = (1 << NumBits) - 1; + while (Bytes) { + unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; + Bytes -= ThisVal; + // Build the new tADD / tSUB. + if (isTwoAddr) + BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addImm(ThisVal); + else { + BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addReg(BaseReg).addImm(ThisVal); + BaseReg = DestReg; + + if (Opc == ARM::tADDrSPi) { + // r4 = add sp, imm + // r4 = add r4, imm + // ... + NumBits = 8; + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + isTwoAddr = true; + } + } + } + + if (ExtraOpc) + BuildMI(MBB, MBBI, TII.get(ExtraOpc), DestReg).addReg(DestReg) + .addImm(((unsigned)NumBytes) & 3); +} + +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int NumBytes, bool isThumb, const TargetInstrInfo &TII) { + if (isThumb) + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII); + else + emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII); +} + void ARMRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (hasFP(MF)) { + if (MF.getFrameInfo()->hasVarSizedObjects()) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr *Old = I; unsigned Amount = Old->getOperand(0).getImmedValue(); if (Amount != 0) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; + // Replace the pseudo instruction with a new instruction... if (Old->getOpcode() == ARM::ADJCALLSTACKDOWN) { - // sub sp, sp, amount - splitInstructionWithImmediate(MBB, I, TII.get(ARM::SUB), ARM::R13, - ARM::R13, Amount); + emitSPUpdate(MBB, I, -Amount, AFI->isThumbFunction(), TII); } else { - // add sp, sp, amount assert(Old->getOpcode() == ARM::ADJCALLSTACKUP); - splitInstructionWithImmediate(MBB, I, TII.get(ARM::ADD), ARM::R13, - ARM::R13, Amount); + emitSPUpdate(MBB, I, Amount, AFI->isThumbFunction(), TII); } } } MBB.erase(I); } -void -ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const { +/// emitThumbConstant - Emit a series of instructions to materialize a +/// constant. +static void emitThumbConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, int Imm, + const TargetInstrInfo &TII) { + bool isSub = Imm < 0; + if (isSub) Imm = -Imm; + + int Chunk = (1 << 8) - 1; + int ThisVal = (Imm > Chunk) ? Chunk : Imm; + Imm -= ThisVal; + BuildMI(MBB, MBBI, TII.get(ARM::tMOVri8), DestReg).addImm(ThisVal); + if (Imm > 0) + emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII); + if (isSub) + BuildMI(MBB, MBBI, TII.get(ARM::tNEG), DestReg).addReg(DestReg); +} + +void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{ + unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); - assert (MI.getOpcode() == ARM::LDR || - MI.getOpcode() == ARM::STR || - MI.getOpcode() == ARM::ADD); + while (!MI.getOperand(i).isFrameIndex()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getFrameIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize(); - unsigned FrameIdx = 1; - unsigned OffIdx = 2; + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + // There is alloca()'s in this function, must reference off the frame + // pointer instead. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + unsigned Opcode = MI.getOpcode(); + const TargetInstrDescriptor &Desc = TII.get(Opcode); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + if (Opcode == ARM::ADDri) { + Offset += MI.getOperand(i+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setInstrDescriptor(TII.get(ARM::MOVrr)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } else if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setInstrDescriptor(TII.get(ARM::SUBri)); + } - int FrameIndex = MI.getOperand(FrameIdx).getFrameIndex(); + // Common case: small offset, fits into instruction. + int ImmedOffset = ARM_AM::getSOImmVal(Offset); + if (ImmedOffset != -1) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(ImmedOffset); + return; + } + + // Otherwise, we fallback to common code below to form the imm offset with + // a sequence of ADDri instructions. First though, pull as much of the imm + // into this ADDri as possible. + unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, (32-RotAmt) & 31); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + // Get the properly encoded SOImmVal field. + int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal); + assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); + MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal); + } else if (Opcode == ARM::tADDrSPi) { + Offset += MI.getOperand(i+1).getImm(); + assert((Offset & 3) == 0 && + "add/sub sp, #imm immediate must be multiple of 4!"); + Offset >>= 2; + if (Offset == 0) { + // Turn it into a move. + MI.setInstrDescriptor(TII.get(ARM::tMOVrr)); + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(i+1); + return; + } + + // Common case: small offset, fits into instruction. + if ((Offset & ~255U) == 0) { + // Replace the FrameIndex with sp / fp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + unsigned DestReg = MI.getOperand(0).getReg(); + if (Offset > 0) { + // Translate r0 = add sp, imm to + // r0 = add sp, 255*4 + // r0 = add r0, (imm - 255*4) + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(255); + Offset = (Offset - 255) << 2; + MachineBasicBlock::iterator NII = next(II); + emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII); + } else { + // Translate r0 = add sp, -imm to + // r0 = -imm (this is then translated into a series of instructons) + // r0 = add r0, sp + Offset <<= 2; + emitThumbConstant(MBB, II, DestReg, Offset, TII); + MI.setInstrDescriptor(TII.get(ARM::tADDhirr)); + MI.getOperand(i).ChangeToRegister(DestReg, false); + MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + } + return; + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrMode2: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 12; + break; + } + case ARMII::AddrMode3: { + ImmIdx = i+2; + InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + break; + } + case ARMII::AddrMode5: { + ImmIdx = i+1; + InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + break; + } + case ARMII::AddrModeTs: { + ImmIdx = i+1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = 8; + Scale = 4; + break; + } + default: + std::cerr << "Unsupported addressing mode!\n"; + abort(); + break; + } - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(OffIdx).getImmedValue(); + Offset += InstrOffs * Scale; + assert((Scale == 1 || (Offset & (Scale-1)) == 0) && + "Can't encode this offset!"); + if (Offset < 0) { + Offset = -Offset; + isSub = true; + } - unsigned StackSize = MF.getFrameInfo()->getStackSize(); + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(i).ChangeToRegister(FrameReg, false); + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + return; + } - Offset += StackSize; + // Otherwise, it didn't fit. Pull in what we can to simplify the immediate. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert(Offset && "This code isn't needed if offset already handled!"); - assert (Offset >= 0); - unsigned BaseRegister = hasFP(MF) ? ARM::R11 : ARM::R13; - if (Offset < 4096) { - // Replace the FrameIndex with r13 - MI.getOperand(FrameIdx).ChangeToRegister(BaseRegister, false); - // Replace the ldr offset with Offset - MI.getOperand(OffIdx).ChangeToImmediate(Offset); + if (isThumb) { + if (TII.isLoad(Opcode)) { + // Use the destination register to materialize sp + offset. + unsigned TmpReg = MI.getOperand(0).getReg(); + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, + isSub ? -Offset : Offset, TII); + MI.getOperand(i).ChangeToRegister(TmpReg, false); + } else if (TII.isStore(Opcode)) { + // FIXME! This is horrific!!! We need register scavenging. + // Our temporary workaround has marked r3 unavailable. Of course, r3 is + // also a ABI register so it's possible that is is the register that is + // being storing here. If that's the case, we do the following: + // r12 = r2 + // Use r2 to materialize sp + offset + // str r12, r2 + // r2 = r12 + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned TmpReg = ARM::R3; + if (DestReg == ARM::R3) { + BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R12).addReg(ARM::R2); + TmpReg = ARM::R2; + } + emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, + isSub ? -Offset : Offset, TII); + MI.getOperand(i).ChangeToRegister(DestReg, false); + if (DestReg == ARM::R3) + BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R2).addReg(ARM::R12); + } else + assert(false && "Unexpected opcode!"); } else { - // Insert a set of r12 with the full address - // r12 = r13 + offset - MachineBasicBlock *MBB2 = MI.getParent(); - splitInstructionWithImmediate(*MBB2, II, TII.get(ARM::ADD), ARM::R12, - BaseRegister, Offset); - - // Replace the FrameIndex with r12 - MI.getOperand(FrameIdx).ChangeToRegister(ARM::R12, false); + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + emitARMRegPlusImmediate(MBB, II, ARM::R12, FrameReg, + isSub ? -Offset : Offset, TII); + MI.getOperand(i).ChangeToRegister(ARM::R12, false); } } void ARMRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} +processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const { + // This tells PEI to spill the FP as if it is any other callee-save register to + // take advantage the eliminateFrameIndex machinery. This also ensures it is + // spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool FramePtrSpilled = MF.getFrameInfo()->hasVarSizedObjects(); + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector<unsigned, 4> UnspilledCS1GPRs; + SmallVector<unsigned, 4> UnspilledCS2GPRs; + if (!FramePtrSpilled && NoFramePointerElim) { + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = getCalleeSavedRegs(); + const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.isPhysRegUsed(Reg)) { + Spilled = true; + FramePtrSpilled = true; + } else { + // Check alias registers too. + for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.isPhysRegUsed(*Aliases)) { + Spilled = true; + FramePtrSpilled = true; + } + } + } + + if (CSRegClasses[i] == &ARM::GPRRegClass) { + if (Spilled) { + NumGPRSpills++; + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + } + } + + if (FramePtrSpilled) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + AFI->setFramePtrSpilled(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.changePhyRegUsed(ARM::LR, true); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + } + + // If stack and double are 8-byte aligned and we are spilling a odd number + // of GPRs. Spill one extra callee save GPR so we won't have to pad between + // the integer and double callee save areas. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (CS1Spilled && !UnspilledCS1GPRs.empty()) + MF.changePhyRegUsed(UnspilledCS1GPRs.front(), true); + else + MF.changePhyRegUsed(UnspilledCS2GPRs.front(), true); + } + MF.changePhyRegUsed(FramePtr, true); + } +} + +/// Move iterator pass the next bunch of callee save load / store ops for +/// the particular spill area (1: integer area 1, 2: integer area 2, +/// 3: fp area, 0: don't care). +static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int Opc, unsigned Area, + const ARMSubtarget &STI) { + while (MBBI != MBB.end() && + MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFrameIndex()) { + if (Area != 0) { + bool Done = false; + unsigned Category = 0; + switch (MBBI->getOperand(0).getReg()) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + Category = 1; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + Category = STI.isDarwin() ? 2 : 1; + break; + case ARM::D8: + case ARM::D9: + case ARM::D10: + case ARM::D11: + case ARM::D12: + case ARM::D13: + case ARM::D14: + case ARM::D15: + Category = 3; + break; + default: + Done = true; + break; + } + if (Done || Category != Area) + break; + } + + ++MBBI; + } +} void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - int NumBytes = (int) MFI->getStackSize(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - bool HasFP = hasFP(MF); + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + if (AFI->isFramePtrSpilled()) { + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, -VARegSaveSize, isThumb, TII); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 1. + emitSPUpdate(MBB, MBBI, -GPRCS1Size, isThumb, TII); + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); + } else { + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) + ++MBBI; + } + + // Point FP to the stack slot that contains the previous FP. + BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + + if (!isThumb) { + // Build the new SUBri to adjust SP for integer callee-save spill area 2. + emitSPUpdate(MBB, MBBI, -GPRCS2Size, false, TII); + + // Build the new SUBri to adjust SP for FP callee-save spill area. + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); + emitSPUpdate(MBB, MBBI, -DPRCSSize, false, TII); + } + } - if (MFI->hasCalls()) { + // If necessary, add one more SUBri to account for the call frame + // and/or local storage, alloca area. + if (MFI->hasCalls()) // We reserve argument space for call sites in the function immediately on // entry to the current function. This eliminates the need for add/sub // brackets around call sites. - NumBytes += MFI->getMaxCallFrameSize(); - } - - if (HasFP) - // Add space for storing the FP - NumBytes += 4; - - // Align to 8 bytes - NumBytes = ((NumBytes + 7) / 8) * 8; + if (!MF.getFrameInfo()->hasVarSizedObjects()) + NumBytes += MFI->getMaxCallFrameSize(); + // Round the size to a multiple of the alignment. + NumBytes = (NumBytes+Align-1)/Align*Align; MFI->setStackSize(NumBytes); - if (NumBytes) { - //sub sp, sp, #NumBytes - splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::SUB), ARM::R13, - ARM::R13, NumBytes); - } + // Determine starting offsets of spill areas. + if (AFI->isFramePtrSpilled()) { + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + if (!isThumb) + movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); + emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII); + } + } else + emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII); + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} - if (HasFP) { - BuildMI(MBB, MBBI, TII.get(ARM::STR)) - .addReg(ARM::R11).addReg(ARM::R13).addImm(0); - BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R11).addReg(ARM::R13).addImm(0). - addImm(ARMShift::LSL); - } +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + return ((MI->getOpcode() == ARM::FLDD || + MI->getOpcode() == ARM::LDR || + MI->getOpcode() == ARM::tLDRspi) && + MI->getOperand(1).isFrameIndex() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); } void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert(MBBI->getOpcode() == ARM::bx && + assert((MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && "Can only insert epilog into returning blocks"); MachineFrameInfo *MFI = MF.getFrameInfo(); - int NumBytes = (int) MFI->getStackSize(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isThumb = AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + if (AFI->isFramePtrSpilled()) { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } - if (hasFP(MF)) { - BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R13).addReg(ARM::R11).addImm(0). - addImm(ARMShift::LSL); - BuildMI(MBB, MBBI, TII.get(ARM::LDR), ARM::R11).addReg(ARM::R13).addImm(0); - } + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + if (isThumb) + emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII); + else { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot. + if (AFI->getGPRCalleeSavedArea2Size() || + AFI->getDPRCalleeSavedAreaSize() || + AFI->getDPRCalleeSavedAreaOffset()) + if (NumBytes) + BuildMI(MBB, MBBI, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) + .addImm(NumBytes); + else + BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tMOVrr : ARM::MOVrr), + ARM::SP).addReg(FramePtr); - if (NumBytes){ - //add sp, sp, #NumBytes - splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::ADD), ARM::R13, - ARM::R13, NumBytes); - } + // Move SP to start of integer callee save spill area 2. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); + emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), false, TII); + // Move SP to start of integer callee save spill area 1. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), false, TII); + + // Move SP to SP upon entry to the function. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); + emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), false, TII); + } + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, VARegSaveSize, isThumb, TII); + } else if (NumBytes != 0) { + emitSPUpdate(MBB, MBBI, NumBytes, isThumb, TII); + } } unsigned ARMRegisterInfo::getRARegister() const { - return ARM::R14; + return ARM::LR; } unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const { - return hasFP(MF) ? ARM::R11 : ARM::R13; + return STI.useThumbBacktraces() ? ARM::R7 : ARM::R11; } #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 16c277ed92..6b9ac39a41 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -17,18 +17,36 @@ #include "llvm/Target/MRegisterInfo.h" #include "ARMGenRegisterInfo.h.inc" +#include <set> namespace llvm { - -class Type; -class TargetInstrInfo; + class TargetInstrInfo; + class ARMSubtarget; + class Type; struct ARMRegisterInfo : public ARMGenRegisterInfo { const TargetInstrInfo &TII; + const ARMSubtarget &STI; +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + +public: + ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); - ARMRegisterInfo(const TargetInstrInfo &tii); + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// ARM::LR, return the number that it corresponds to (e.g. 14). + static unsigned getRegisterNumbering(unsigned RegEnum); /// Code Generation virtual methods... + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, int FrameIndex, @@ -43,9 +61,8 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo { unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *RC) const; - virtual MachineInstr* foldMemoryOperand(MachineInstr* MI, - unsigned OpNum, - int FrameIndex) const; + MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum, + int FrameIndex) const; const unsigned *getCalleeSavedRegs() const; @@ -57,7 +74,7 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator II) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 24f53d9331..51d52c6971 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// +//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,129 +13,169 @@ //===----------------------------------------------------------------------===// // Registers are identified with 4-bit ID numbers. -class ARMReg<string n> : Register<n> { +class ARMReg<bits<4> num, string n, list<Register> aliases = []> : Register<n> { + field bits<4> Num; let Namespace = "ARM"; + let Aliases = aliases; } -// Ri - 32-bit integer registers -class Ri<bits<4> num, string n> : ARMReg<n> { - field bits<4> Num; - let Num = num; -} -// Rf - 32-bit floating-point registers -class Rf<bits<5> num, string n> : ARMReg<n> { - field bits<5> Num; - let Num = num; -} -// Rd - Slots in the FP register file for 64-bit floating-point values. -class Rd<bits<5> num, string n, list<Register> aliases> : ARMReg<n> { +class ARMFReg<bits<5> num, string n> : Register<n> { field bits<5> Num; - let Num = num; - let Aliases = aliases; + let Namespace = "ARM"; } // Integer registers -def R0 : Ri< 0, "R0">, DwarfRegNum<0>; -def R1 : Ri< 1, "R1">, DwarfRegNum<1>; -def R2 : Ri< 2, "R2">, DwarfRegNum<2>; -def R3 : Ri< 3, "R3">, DwarfRegNum<3>; -def R4 : Ri< 4, "R4">, DwarfRegNum<4>; -def R5 : Ri< 5, "R5">, DwarfRegNum<5>; -def R6 : Ri< 6, "R6">, DwarfRegNum<6>; -def R7 : Ri< 7, "R7">, DwarfRegNum<7>; -def R8 : Ri< 8, "R8">, DwarfRegNum<8>; -def R9 : Ri< 9, "R9">, DwarfRegNum<9>; -def R10 : Ri<10, "R10">, DwarfRegNum<10>; -def R11 : Ri<11, "R11">, DwarfRegNum<11>; -def R12 : Ri<12, "R12">, DwarfRegNum<12>; -def R13 : Ri<13, "R13">, DwarfRegNum<13>; -def R14 : Ri<14, "R14">, DwarfRegNum<14>; -def R15 : Ri<15, "R15">, DwarfRegNum<15>; +def R0 : ARMReg< 0, "r0">, DwarfRegNum<0>; +def R1 : ARMReg< 1, "r1">, DwarfRegNum<1>; +def R2 : ARMReg< 2, "r2">, DwarfRegNum<2>; +def R3 : ARMReg< 3, "r3">, DwarfRegNum<3>; +def R4 : ARMReg< 4, "r4">, DwarfRegNum<4>; +def R5 : ARMReg< 5, "r5">, DwarfRegNum<5>; +def R6 : ARMReg< 6, "r6">, DwarfRegNum<6>; +def R7 : ARMReg< 7, "r7">, DwarfRegNum<7>; +def R8 : ARMReg< 8, "r8">, DwarfRegNum<8>; +def R9 : ARMReg< 9, "r9">, DwarfRegNum<9>; +def R10 : ARMReg<10, "r10">, DwarfRegNum<10>; +def R11 : ARMReg<11, "r11">, DwarfRegNum<11>; +def R12 : ARMReg<12, "r12">, DwarfRegNum<12>; +def SP : ARMReg<13, "sp">, DwarfRegNum<13>; +def LR : ARMReg<14, "lr">, DwarfRegNum<14>; +def PC : ARMReg<15, "pc">, DwarfRegNum<15>; -// TODO: update to VFP-v3 -// Floating-point registers -def S0 : Rf< 0, "S0">, DwarfRegNum<64>; -def S1 : Rf< 1, "S1">, DwarfRegNum<65>; -def S2 : Rf< 2, "S2">, DwarfRegNum<66>; -def S3 : Rf< 3, "S3">, DwarfRegNum<67>; -def S4 : Rf< 4, "S4">, DwarfRegNum<68>; -def S5 : Rf< 5, "S5">, DwarfRegNum<69>; -def S6 : Rf< 6, "S6">, DwarfRegNum<70>; -def S7 : Rf< 7, "S7">, DwarfRegNum<71>; -def S8 : Rf< 8, "S8">, DwarfRegNum<72>; -def S9 : Rf< 9, "S9">, DwarfRegNum<73>; -def S10 : Rf<10, "S10">, DwarfRegNum<74>; -def S11 : Rf<11, "S11">, DwarfRegNum<75>; -def S12 : Rf<12, "S12">, DwarfRegNum<76>; -def S13 : Rf<13, "S13">, DwarfRegNum<77>; -def S14 : Rf<14, "S14">, DwarfRegNum<78>; -def S15 : Rf<15, "S15">, DwarfRegNum<79>; -def S16 : Rf<16, "S16">, DwarfRegNum<80>; -def S17 : Rf<17, "S17">, DwarfRegNum<81>; -def S18 : Rf<18, "S18">, DwarfRegNum<82>; -def S19 : Rf<19, "S19">, DwarfRegNum<83>; -def S20 : Rf<20, "S20">, DwarfRegNum<84>; -def S21 : Rf<21, "S21">, DwarfRegNum<85>; -def S22 : Rf<22, "S22">, DwarfRegNum<86>; -def S23 : Rf<23, "S23">, DwarfRegNum<87>; -def S24 : Rf<24, "S24">, DwarfRegNum<88>; -def S25 : Rf<25, "S25">, DwarfRegNum<89>; -def S26 : Rf<26, "S26">, DwarfRegNum<90>; -def S27 : Rf<27, "S27">, DwarfRegNum<91>; -def S28 : Rf<28, "S28">, DwarfRegNum<92>; -def S29 : Rf<29, "S29">, DwarfRegNum<93>; -def S30 : Rf<30, "S30">, DwarfRegNum<94>; -def S31 : Rf<31, "S31">, DwarfRegNum<95>; +// Float registers +def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; +def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">; +def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">; +def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">; +def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">; +def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">; +def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">; +def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">; +def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">; +def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">; +def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">; +def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">; +def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; +def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; +def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; +def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; -// Aliases of the S* registers used to hold 64-bit fp values (doubles) -def D0 : Rd< 0, "D0", [S0, S1]>, DwarfRegNum<64>; -def D1 : Rd< 2, "D1", [S2, S3]>, DwarfRegNum<66>; -def D2 : Rd< 4, "D2", [S4, S5]>, DwarfRegNum<68>; -def D3 : Rd< 6, "D3", [S6, S7]>, DwarfRegNum<70>; -def D4 : Rd< 8, "D4", [S8, S9]>, DwarfRegNum<72>; -def D5 : Rd<10, "D5", [S10, S11]>, DwarfRegNum<74>; -def D6 : Rd<12, "D6", [S12, S13]>, DwarfRegNum<76>; -def D7 : Rd<14, "D7", [S14, S15]>, DwarfRegNum<78>; -def D8 : Rd<16, "D8", [S16, S17]>, DwarfRegNum<80>; -def D9 : Rd<18, "D9", [S18, S19]>, DwarfRegNum<82>; -def D10 : Rd<20, "D10", [S20, S21]>, DwarfRegNum<84>; -def D11 : Rd<22, "D11", [S22, S23]>, DwarfRegNum<86>; -def D12 : Rd<24, "D12", [S24, S25]>, DwarfRegNum<88>; -def D13 : Rd<26, "D13", [S26, S27]>, DwarfRegNum<90>; -def D14 : Rd<28, "D14", [S28, S29]>, DwarfRegNum<92>; -def D15 : Rd<30, "D15", [S30, S31]>, DwarfRegNum<94>; +// Aliases of the F* registers used to hold 64-bit fp values (doubles) +def D0 : ARMReg< 0, "d0", [S0, S1]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>; +def D10 : ARMReg<10, "d10", [S20, S21]>; +def D11 : ARMReg<11, "d11", [S22, S23]>; +def D12 : ARMReg<12, "d12", [S24, S25]>; +def D13 : ARMReg<13, "d13", [S26, S27]>; +def D14 : ARMReg<14, "d14", [S28, S29]>; +def D15 : ARMReg<15, "d15", [S30, S31]>; // Register classes. // -// FIXME: the register order should be defined in terms of the preferred -// allocation order... +// pc == Program Counter +// lr == Link Register +// sp == Stack Pointer +// r12 == ip (scratch) +// r7 == Frame Pointer (thumb-style backtraces) +// r11 == Frame Pointer (arm-style backtraces) +// r10 == Stack Limit // -def IntRegs : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, - R7, R8, R9, R10, R11, R12, - R13, R14, R15]> { +def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R12, R11, + LR, SP, PC]> { let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; + // FIXME: We are reserving r12 in case the PEI needs to use it to + // generate large stack offset. Make it available once we have register + // scavenging. let MethodBodies = [{ - IntRegsClass::iterator - IntRegsClass::allocation_order_end(const MachineFunction &MF) const { - // r15 == Program Counter - // r14 == Link Register - // r13 == Stack Pointer - // r12 == ip (scratch) - // r11 == Frame Pointer - // r10 == Stack Limit - if (hasFP(MF)) - return end() - 5; - else - return end() - 4; + // FP is R11, R9 is available. + static const unsigned ARM_GPR_AO_1[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::LR, ARM::R11 }; + // FP is R11, R9 is not available. + static const unsigned ARM_GPR_AO_2[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R10, + ARM::LR, ARM::R11 }; + // FP is R7, R9 is available. + static const unsigned ARM_GPR_AO_3[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R8, + ARM::R9, ARM::R10,ARM::R11, + ARM::LR, ARM::R7 }; + // FP is R7, R9 is not available. + static const unsigned ARM_GPR_AO_4[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R8, + ARM::R10,ARM::R11, + ARM::LR, ARM::R7 }; + // FP is R7, only low registers available. + static const unsigned THUMB_GPR_AO[] = { + ARM::R0, ARM::R1, ARM::R2, + ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; + + GPRClass::iterator + GPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb()) + return THUMB_GPR_AO; + if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_4; + else + return ARM_GPR_AO_3; + } else { + if (Subtarget.isR9Reserved()) + return ARM_GPR_AO_2; + else + return ARM_GPR_AO_1; + } + } + + GPRClass::iterator + GPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + GPRClass::iterator I; + if (Subtarget.isThumb()) + I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); + else if (Subtarget.useThumbBacktraces()) { + if (Subtarget.isR9Reserved()) + I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); + else + I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); + } else { + if (Subtarget.isR9Reserved()) + I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); + else + I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); + } + + return hasFP(MF) ? I-1 : I; } }]; } -def FPRegs : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, +def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31]>; -def DFPRegs : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15]>; +// ARM requires only word alignment for double. It's more performant if it +// is double-word alignment though. +def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, + D9, D10, D11, D12, D13, D14, D15]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp new file mode 100644 index 0000000000..35bb9accc3 --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -0,0 +1,52 @@ +//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "ARMSubtarget.h" +#include "ARMGenSubtarget.inc" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +// FIXME: this is temporary. +static cl::opt<bool> Thumb("enable-thumb", + cl::desc("Switch to thumb mode in ARM backend")); + +ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS) + : ARMArchVersion(V4T), HasVFP2(false), IsDarwin(false), + UseThumbBacktraces(false), IsR9Reserved(false), stackAlignment(8) { + + // Determine default and user specified characteristics + std::string CPU = "generic"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); + + IsThumb = Thumb; + + // Set the boolean corresponding to the current target triple, or the default + // if one cannot be determined, to true. + const std::string& TT = M.getTargetTriple(); + if (TT.length() > 5) { + IsDarwin = TT.find("-darwin") != std::string::npos; + } else if (TT.empty()) { +#if defined(__APPLE__) + IsDarwin = true; +#endif + } + + if (IsDarwin) { + UseThumbBacktraces = true; + IsR9Reserved = true; + stackAlignment = 4; + } +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h new file mode 100644 index 0000000000..d5c4b5eb03 --- /dev/null +++ b/lib/Target/ARM/ARMSubtarget.h @@ -0,0 +1,82 @@ +//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Evan Cheng and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the ARM specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSUBTARGET_H +#define ARMSUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include <string> + +namespace llvm { +class Module; + +class ARMSubtarget : public TargetSubtarget { +protected: + enum ARMArchEnum { + V4T, V5T, V5TE, V6 + }; + + /// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE, + /// and V6. + ARMArchEnum ARMArchVersion; + + /// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2 + /// instructions. + bool HasVFP2; + + /// IsThumb - True if we are in thumb mode, false if in ARM mode. + bool IsThumb; + + bool IsDarwin; + + /// UseThumbBacktraces - True if we use thumb style backtraces. + bool UseThumbBacktraces; + + /// IsR9Reserved - True if R9 is a not available as general purpose register. + bool IsR9Reserved; + + /// stackAlignment - The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + unsigned stackAlignment; + + public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + ARMSubtarget(const Module &M, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + + bool hasV4TOps() const { return ARMArchVersion >= V4T; } + bool hasV5TOps() const { return ARMArchVersion >= V5T; } + bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } + bool hasV6Ops() const { return ARMArchVersion >= V6; } + + bool hasVFP2() const { return HasVFP2; } + + bool isDarwin() const { return IsDarwin; } + bool isThumb() const { return IsThumb; } + + bool useThumbBacktraces() const { return UseThumbBacktraces; } + bool isR9Reserved() const { return IsR9Reserved; } + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by every + /// function for this subtarget. + unsigned getStackAlignment() const { return stackAlignment; } +}; +} // End llvm namespace + +#endif // ARMSUBTARGET_H diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp index a1c623c85a..65848e5376 100644 --- a/lib/Target/ARM/ARMTargetAsmInfo.cpp +++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp @@ -12,18 +12,50 @@ //===----------------------------------------------------------------------===// #include "ARMTargetAsmInfo.h" - +#include "ARMTargetMachine.h" using namespace llvm; ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) { - Data16bitsDirective = "\t.half\t"; - Data32bitsDirective = "\t.word\t"; + const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); + if (Subtarget->isDarwin()) { + HasDotTypeDotSizeDirective = false; + PrivateGlobalPrefix = "L"; + GlobalPrefix = "_"; + ZeroDirective = "\t.space\t"; + SetDirective = "\t.set"; + WeakRefDirective = "\t.weak_reference\t"; + JumpTableDataSection = ".const"; + CStringSection = "\t.cstring"; + StaticCtorsSection = ".mod_init_func"; + StaticDtorsSection = ".mod_term_func"; + InlineAsmStart = "@ InlineAsm Start"; + InlineAsmEnd = "@ InlineAsm End"; + LCOMMDirective = "\t.lcomm\t"; + COMMDirectiveTakesAlignment = false; + + NeedsSet = true; + DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug"; + DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug"; + DwarfLineSection = ".section __DWARF,__debug_line,regular,debug"; + DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug"; + DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug"; + DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug"; + DwarfStrSection = ".section __DWARF,__debug_str,regular,debug"; + DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug"; + DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug"; + DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug"; + DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug"; + } else { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + ZeroDirective = "\t.skip\t"; + WeakRefDirective = "\t.weak\t"; + StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; + StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; + } + AlignmentIsInBytes = false; Data64bitsDirective = 0; - ZeroDirective = "\t.skip\t"; CommentString = "@"; + DataSection = "\t.data"; ConstantPoolSection = "\t.text\n"; - AlignmentIsInBytes = false; - WeakRefDirective = "\t.weak\t"; - StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits"; - StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits"; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4328ca8465..195689e059 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,30 +11,32 @@ // //===----------------------------------------------------------------------===// -#include "ARMTargetAsmInfo.h" #include "ARMTargetMachine.h" +#include "ARMTargetAsmInfo.h" #include "ARMFrameInfo.h" #include "ARM.h" #include "llvm/Module.h" #include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, + cl::desc("Disable load store optimization pass")); + namespace { // Register the target. RegisterTarget<ARMTargetMachine> X("arm", " ARM"); } - -const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const { - return new ARMTargetAsmInfo(*this); -} - - /// TargetMachine ctor - Create an ILP32 architecture model /// ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS) - : DataLayout("e-p:32:32") { + : Subtarget(M, FS), DataLayout("e-p:32:32-d:32"), InstrInfo(Subtarget), + FrameInfo(Subtarget) { + if (Subtarget.isDarwin()) + NoFramePointerElim = true; } unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) { @@ -49,14 +51,23 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) { } +const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const { + return new ARMTargetAsmInfo(*this); +} + + // Pass Pipeline Configuration bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) { PM.add(createARMISelDag(*this)); return false; } -bool ARMTargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) { - PM.add(createARMFixMulPass()); +bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) { + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (!Fast && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass()); + + PM.add(createARMConstantIslandPass()); return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index b9a3d9809d..9c888ea395 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -20,19 +20,17 @@ #include "llvm/Target/TargetFrameInfo.h" #include "ARMInstrInfo.h" #include "ARMFrameInfo.h" +#include "ARMSubtarget.h" namespace llvm { class Module; class ARMTargetMachine : public LLVMTargetMachine { - const TargetData DataLayout; // Calculates type size & alignment - ARMInstrInfo InstrInfo; - ARMFrameInfo FrameInfo; - -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - + ARMSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + ARMInstrInfo InstrInfo; + ARMFrameInfo FrameInfo; public: ARMTargetMachine(const Module &M, const std::string &FS); @@ -42,11 +40,14 @@ public: return &InstrInfo.getRegisterInfo(); } virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } static unsigned getModuleMatchQuality(const Module &M); + virtual const TargetAsmInfo *createTargetAsmInfo() const; + // Pass Pipeline Configuration virtual bool addInstSelector(FunctionPassManager &PM, bool Fast); - virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast); + virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast); virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast, std::ostream &Out); }; diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 4310b8c8e4..77300a1480 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -7,6 +7,7 @@ # License. See LICENSE.TXT for details. # ##===----------------------------------------------------------------------===## + LEVEL = ../../.. LIBRARYNAME = LLVMARM TARGET = ARM @@ -15,7 +16,6 @@ TARGET = ARM BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ - ARMGenDAGISel.inc + ARMGenDAGISel.inc ARMGenSubtarget.inc include $(LEVEL)/Makefile.common - diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt new file mode 100644 index 0000000000..e9e943d0ae --- /dev/null +++ b/lib/Target/ARM/README-Thumb.txt @@ -0,0 +1,17 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb specific). +//===---------------------------------------------------------------------===// + +* Add support for compiling functions in both ARM and Thumb mode, then taking + the smallest. +* Add support for compiling individual basic blocks in thumb mode, when in a + larger ARM function. This can be used for presumed cold code, like paths + to abort (failure path of asserts), EH handling code, etc. + +* Thumb doesn't have normal pre/post increment addressing modes, but you can + load/store 32-bit integers with pre/postinc by using load/store multiple + instrs with a single register. + +* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add + and cmp instructions can use high registers. Also, we can use them as + temporaries to spill values into. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 0f79322b63..000e8e6450 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -2,69 +2,438 @@ // Random ideas for the ARM backend. //===---------------------------------------------------------------------===// -Consider implementing a select with two conditional moves: +Reimplement 'select' in terms of 'SEL'. -cmp x, y -moveq dst, a -movne dst, b +* We would really like to support UXTAB16, but we need to prove that the + add doesn't need to overflow between the two 16-bit chunks. ----------------------------------------------------------- +* implement predication support +* Implement pre/post increment support. (e.g. PR935) +* Coalesce stack slots! +* Implement smarter constant generation for binops with large immediates. +* Consider materializing FP constants like 0.0f and 1.0f using integer + immediate instructions then copy to FPU. Slower than load into FPU? -%tmp1 = shl int %b, ubyte %c -%tmp4 = add int %a, %tmp1 +//===---------------------------------------------------------------------===// + +The constant island pass is extremely naive. If a constant pool entry is +out of range, it *always* splits a block and inserts a copy of the cp +entry inline. It should: + +1. Check to see if there is already a copy of this constant nearby. If so, + reuse it. +2. Instead of always splitting blocks to insert the constant, insert it in + nearby 'water'. +3. Constant island references should be ref counted. If a constant reference + is out-of-range, and the last reference to a constant is relocated, the + dead constant should be removed. + +This pass has all the framework needed to implement this, but it hasn't +been done. + +//===---------------------------------------------------------------------===// + +We need to start generating predicated instructions. The .td files have a way +to express this now (see the PPC conditional return instruction), but the +branch folding pass (or a new if-cvt pass) should start producing these, at +least in the trivial case. + +Among the obvious wins, doing so can eliminate the need to custom expand +copysign (i.e. we won't need to custom expand it to get the conditional +negate). + +//===---------------------------------------------------------------------===// + +Implement long long "X-3" with instructions that fold the immediate in. These +were disabled due to badness with the ARM carry flag on subtracts. + +//===---------------------------------------------------------------------===// + +We currently compile abs: +int foo(int p) { return p < 0 ? -p : p; } + +into: + +_foo: + rsb r1, r0, #0 + cmn r0, #1 + movgt r1, r0 + mov r0, r1 + bx lr + +This is very, uh, literal. This could be a 3 operation sequence: + t = (p sra 31); + res = (p xor t)-t + +Which would be better. This occurs in png decode. + +//===---------------------------------------------------------------------===// + +More load / store optimizations: +1) Look past instructions without side-effects (not load, store, branch, etc.) + when forming the list of loads / stores to optimize. + +2) Smarter register allocation? +We are probably missing some opportunities to use ldm / stm. Consider: + +ldr r5, [r0] +ldr r4, [r0, #4] + +This cannot be merged into a ldm. Perhaps we will need to do the transformation +before register allocation. Then teach the register allocator to allocate a +chunk of consecutive registers. + +3) Better representation for block transfer? This is from Olden/power: -compiles to + fldd d0, [r4] + fstd d0, [r4, #+32] + fldd d0, [r4, #+8] + fstd d0, [r4, #+40] + fldd d0, [r4, #+16] + fstd d0, [r4, #+48] + fldd d0, [r4, #+24] + fstd d0, [r4, #+56] -add r0, r0, r1, lsl r2 +If we can spare the registers, it would be better to use fldm and fstm here. +Need major register allocator enhancement though. -but +4) Can we recognize the relative position of constantpool entries? i.e. Treat -%tmp1 = shl int %b, ubyte %c -%tmp4 = add int %tmp1, %a + ldr r0, LCPI17_3 + ldr r1, LCPI17_4 + ldr r2, LCPI17_5 -compiles to -mov r1, r1, lsl r2 -add r0, r1, r0 + as + ldr r0, LCPI17 + ldr r1, LCPI17+4 + ldr r2, LCPI17+8 ---------------------------------------------------------- -%tmp1 = shl int %b, ubyte 4 -%tmp2 = add int %a, %tmp1 + Then the ldr's can be combined into a single ldm. See Olden/power. -compiles to +Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a +double 64-bit FP constant: -mov r2, #4 -add r0, r0, r1, lsl r2 + adr r0, L6 + ldmia r0, {r0-r1} -should be + .align 2 +L6: + .long -858993459 + .long 1074318540 -add r0, r0, r1, lsl #4 +5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use +ldrd/strd instead if there are only two destination registers that form an +odd/even pair. However, we probably would pay a penalty if the address is not +aligned on 8-byte boundary. This requires more information on load / store +nodes (and MI's?) then we currently carry. ----------------------------------------------------------- +//===---------------------------------------------------------------------===// + +* Consider this silly example: + +double bar(double x) { + double r = foo(3.1); + return x+r; +} + +_bar: + sub sp, sp, #16 + str r4, [sp, #+12] + str r5, [sp, #+8] + str lr, [sp, #+4] + mov r4, r0 + mov r5, r1 + ldr r0, LCPI2_0 + bl _foo + fmsr f0, r0 + fcvtsd d0, f0 + fmdrr d1, r4, r5 + faddd d0, d0, d1 + fmrrd r0, r1, d0 + ldr lr, [sp, #+4] + ldr r5, [sp, #+8] + ldr r4, [sp, #+12] + add sp, sp, #16 + bx lr + +Ignore the prologue and epilogue stuff for a second. Note + mov r4, r0 + mov r5, r1 +the copys to callee-save registers and the fact they are only being used by the +fmdrr instruction. It would have been better had the fmdrr been scheduled +before the call and place the result in a callee-save DPR register. The two +mov ops would not have been necessary. + +//===---------------------------------------------------------------------===// + +Calling convention related stuff: + +* gcc's parameter passing implementation is terrible and we suffer as a result: + +e.g. +struct s { + double d1; + int s1; +}; + +void foo(struct s S) { + printf("%g, %d\n", S.d1, S.s1); +} -add an offset to FLDS/FLDD/FSTD/FSTS addressing mode +'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and +then reload them to r1, r2, and r3 before issuing the call (r0 contains the +address of the format string): ----------------------------------------------------------- + stmfd sp!, {r7, lr} + add r7, sp, #0 + sub sp, sp, #12 + stmia sp, {r0, r1, r2} + ldmia sp, {r1-r2} + ldr r0, L5 + ldr r3, [sp, #8] +L2: + add r0, pc, r0 + bl L_printf$stub -the function +Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves? -void %f() { +* Return an aggregate type is even worse: + +e.g. +struct s foo(void) { + struct s S = {1.1, 2}; + return S; +} + + mov ip, r0 + ldr r0, L5 + sub sp, sp, #12 +L2: + add r0, pc, r0 + @ lr needed for prologue + ldmia r0, {r0, r1, r2} + stmia sp, {r0, r1, r2} + stmia ip, {r0, r1, r2} + mov r0, ip + add sp, sp, #12 + bx lr + +r0 (and later ip) is the hidden parameter from caller to store the value in. The +first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1, +r2 into the address passed in. However, there is one additional stmia that +stores r0, r1, and r2 to some stack location. The store is dead. + +The llvm-gcc generated code looks like this: + +csretcc void %foo(%struct.s* %agg.result) { entry: - call void %g( int 1, int 2, int 3, int 4, int 5 ) + %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1] + %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1] + cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 ) + cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2] + call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 ) + cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1] + call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 ) ret void } -declare void %g(int, int, int, int, int) +llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from +constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated +into a number of load and stores, or 2) custom lower memcpy (of small size) to +be ldmia / stmia. I think option 2 is better but the current register +allocator cannot allocate a chunk of registers at a time. -Only needs 8 bytes of stack space. We currently allocate 16. +A feasible temporary solution is to use specific physical registers at the +lowering time for small (<= 4 words?) transfer size. ----------------------------------------------------------- +* ARM CSRet calling convention requires the hidden argument to be returned by +the callee. -32 x 32 -> 64 multiplications currently uses two instructions. We -should try to declare smull and umull as returning two values. +//===---------------------------------------------------------------------===// + +We can definitely do a better job on BB placements to eliminate some branches. +It's very common to see llvm generated assembly code that looks like this: + +LBB3: + ... +LBB4: +... + beq LBB3 + b LBB2 ----------------------------------------------------------- +If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can +then eliminate beq and and turn the unconditional branch to LBB2 to a bne. + +See McCat/18-imp/ComputeBoundingBoxes for an example. + +//===---------------------------------------------------------------------===// -Implement addressing modes 2 (ldrb) and 3 (ldrsb) +We need register scavenging. Currently, the 'ip' register is reserved in case +frame indexes are too big. This means that we generate extra code for stuff +like this: ----------------------------------------------------------- +void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) { + short Rconst = (short) (16384.0f * 1.40200 + 0.5 ); + *a = x * Rconst; + *b = y * Rconst; + *c = z * Rconst; +} + +we compile it to: + +_foo: +*** stmfd sp!, {r4, r7} +*** add r7, sp, #4 + mov r4, #186 + orr r4, r4, #89, 24 @ 22784 + mul r0, r0, r4 + str r0, [r3] + mul r0, r1, r4 + ldr r1, [sp, #+8] + str r0, [r1] + mul r0, r2, r4 + ldr r1, [sp, #+12] + str r0, [r1] +*** sub sp, r7, #4 +*** ldmfd sp!, {r4, r7} + bx lr + +GCC produces: + +_foo: + ldr ip, L4 + mul r0, ip, r0 + mul r1, ip, r1 + str r0, [r3, #0] + ldr r3, [sp, #0] + mul r2, ip, r2 + str r1, [r3, #0] + ldr r3, [sp, #4] + str r2, [r3, #0] + bx lr +L4: + .long 22970 + +This is apparently all because we couldn't use ip here. + +//===---------------------------------------------------------------------===// + +Pre-/post- indexed load / stores: + +1) We should not make the pre/post- indexed load/store transform if the base ptr +is guaranteed to be live beyond the load/store. This can happen if the base +ptr is live out of the block we are performing the optimization. e.g. + +mov r1, r2 +ldr r3, [r1], #4 +... + +vs. + +ldr r3, [r2] +add r1, r2, #4 +... + +In most cases, this is just a wasted optimization. However, sometimes it can +negatively impact the performance because two-address code is more restrictive +when it comes to scheduling. + +Unfortunately, liveout information is currently unavailable during DAG combine +time. + +2) Consider spliting a indexed load / store into a pair of add/sub + load/store + to solve #1 (in TwoAddressInstructionPass.cpp). + +3) Enhance LSR to generate more opportunities for indexed ops. + +4) Once we added support for multiple result patterns, write indexed loads + patterns instead of C++ instruction selection code. + +5) Use FLDM / FSTM to emulate indexed FP load / store. + +//===---------------------------------------------------------------------===// + +We should add i64 support to take advantage of the 64-bit load / stores. +We can add a pseudo i64 register class containing pseudo registers that are +register pairs. All other ops (e.g. add, sub) would be expanded as usual. + +We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers +from the i64 register. These are single moves which can be eliminated if the +destination register is a sub-register of the source. We should implement proper +subreg support in the register allocator to coalesce these away. + +There are other minor issues such as multiple instructions for a spill / restore +/ move. + +//===---------------------------------------------------------------------===// + +Implement support for some more tricky ways to materialize immediates. For +example, to get 0xffff8000, we can use: + +mov r9, #&3f8000 +sub r9, r9, #&400000 + +//===---------------------------------------------------------------------===// + +We sometimes generate multiple add / sub instructions to update sp in prologue +and epilogue if the inc / dec value is too large to fit in a single immediate +operand. In some cases, perhaps it might be better to load the value from a +constantpool instead. + +//===---------------------------------------------------------------------===// + +GCC generates significantly better code for this function. + +int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) { + int i = 0; + + if (StackPtr != 0) { + while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768))) + Line[i++] = Stack[--StackPtr]; + if (LineLen > 32768) + { + while (StackPtr != 0 && i < LineLen) + { + i++; + --StackPtr; + } + } + } + return StackPtr; +} + +//===---------------------------------------------------------------------===// + +This should compile to the mlas instruction: +int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; } + +//===---------------------------------------------------------------------===// + +At some point, we should triage these to see if they still apply to us: + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982 + +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702 +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663 + +http://www.inf.u-szeged.hu/gcc-arm/ +http://citeseer.ist.psu.edu/debus04linktime.html + +//===---------------------------------------------------------------------===// |