aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.h98
-rw-r--r--lib/Target/ARM/ARM.td77
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h394
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp858
-rw-r--r--lib/Target/ARM/ARMCommon.cpp84
-rw-r--r--lib/Target/ARM/ARMCommon.h22
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp490
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp55
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h50
-rw-r--r--lib/Target/ARM/ARMFrameInfo.h10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp1436
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1414
-rw-r--r--lib/Target/ARM/ARMISelLowering.h134
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp405
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h67
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1357
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td513
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td359
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp628
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h136
-rw-r--r--lib/Target/ARM/ARMMul.cpp75
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp1028
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h33
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td246
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp52
-rw-r--r--lib/Target/ARM/ARMSubtarget.h82
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.cpp48
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp31
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h17
-rw-r--r--lib/Target/ARM/Makefile4
-rw-r--r--lib/Target/ARM/README-Thumb.txt17
-rw-r--r--lib/Target/ARM/README.txt443
32 files changed, 8695 insertions, 1968 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index db270739ea..1d626d1c88 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -20,43 +20,77 @@
#include <cassert>
namespace llvm {
- // Enums corresponding to ARM condition codes
- namespace ARMCC {
- enum CondCodes {
- EQ,
- NE,
- CS,
- CC,
- MI,
- PL,
- VS,
- VC,
- HI,
- LS,
- GE,
- LT,
- GT,
- LE,
- AL
- };
+
+class ARMTargetMachine;
+class FunctionPass;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+ enum CondCodes {
+ EQ,
+ NE,
+ HS,
+ LO,
+ MI,
+ PL,
+ VS,
+ VC,
+ HI,
+ LS,
+ GE,
+ LT,
+ GT,
+ LE,
+ AL
+ };
+
+ inline static CondCodes getOppositeCondition(CondCodes CC){
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
}
+}
- namespace ARMShift {
- enum ShiftTypes {
- LSL,
- LSR,
- ASR,
- ROR,
- RRX
- };
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code");
+ case ARMCC::EQ: return "eq";
+ case ARMCC::NE: return "ne";
+ case ARMCC::HS: return "hs";
+ case ARMCC::LO: return "lo";
+ case ARMCC::MI: return "mi";
+ case ARMCC::PL: return "pl";
+ case ARMCC::VS: return "vs";
+ case ARMCC::VC: return "vc";
+ case ARMCC::HI: return "hi";
+ case ARMCC::LS: return "ls";
+ case ARMCC::GE: return "ge";
+ case ARMCC::LT: return "lt";
+ case ARMCC::GT: return "gt";
+ case ARMCC::LE: return "le";
+ case ARMCC::AL: return "al";
}
+}
- class FunctionPass;
- class TargetMachine;
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(std::ostream &O, ARMTargetMachine &TM);
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
- FunctionPass *createARMISelDag(TargetMachine &TM);
- FunctionPass *createARMCodePrinterPass(std::ostream &OS, TargetMachine &TM);
- FunctionPass *createARMFixMulPass();
} // end namespace llvm;
// Defines symbolic names for ARM registers. This defines a mapping from
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 40153493d8..6faf938fca 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -18,6 +18,73 @@
include "../Target.td"
//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFP2", "true",
+ "Enable VFP2 instructions ">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic", []>;
+def : Proc<"arm8", []>;
+def : Proc<"arm810", []>;
+def : Proc<"strongarm", []>;
+def : Proc<"strongarm110", []>;
+def : Proc<"strongarm1100", []>;
+def : Proc<"strongarm1110", []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi", [ArchV4T]>;
+def : Proc<"arm7tdmi-s", [ArchV4T]>;
+def : Proc<"arm710t", [ArchV4T]>;
+def : Proc<"arm720t", [ArchV4T]>;
+def : Proc<"arm9", [ArchV4T]>;
+def : Proc<"arm9tdmi", [ArchV4T]>;
+def : Proc<"arm920", [ArchV4T]>;
+def : Proc<"arm920t", [ArchV4T]>;
+def : Proc<"arm922t", [ArchV4T]>;
+def : Proc<"arm940t", [ArchV4T]>;
+def : Proc<"ep9312", [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi", [ArchV5T]>;
+def : Proc<"arm1020t", [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e", [ArchV5TE]>;
+def : Proc<"arm946e-s", [ArchV5TE]>;
+def : Proc<"arm966e-s", [ArchV5TE]>;
+def : Proc<"arm968e-s", [ArchV5TE]>;
+def : Proc<"arm10e", [ArchV5TE]>;
+def : Proc<"arm1020e", [ArchV5TE]>;
+def : Proc<"arm1022e", [ArchV5TE]>;
+def : Proc<"xscale", [ArchV5TE]>;
+def : Proc<"iwmmxt", [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s", [ArchV6]>;
+def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s", [ArchV6]>;
+def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp", [ArchV6]>;
+def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
+
+//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -31,8 +98,14 @@ include "ARMInstrInfo.td"
def ARMInstrInfo : InstrInfo {
// Define how we want to layout our target-specific information field.
- let TSFlagsFields = [];
- let TSFlagsShifts = [];
+ let TSFlagsFields = ["AddrModeBits",
+ "SizeFlag",
+ "IndexModeBits",
+ "Opcode"];
+ let TSFlagsShifts = [0,
+ 4,
+ 7,
+ 9];
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 0000000000..3f47a69471
--- /dev/null
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,394 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+ enum ShiftOpc {
+ no_shift = 0,
+ asr,
+ lsl,
+ lsr,
+ ror,
+ rrx
+ };
+
+ enum AddrOpc {
+ add = '+', sub = '-'
+ };
+
+ static inline const char *getShiftOpcStr(ShiftOpc Op) {
+ switch (Op) {
+ default: assert(0 && "Unknown shift opc!");
+ case ARM_AM::asr: return "asr";
+ case ARM_AM::lsl: return "lsl";
+ case ARM_AM::lsr: return "lsr";
+ case ARM_AM::ror: return "ror";
+ case ARM_AM::rrx: return "rrx";
+ }
+ }
+
+ static inline ShiftOpc getShiftOpcForNode(SDOperand N) {
+ switch (N.getOpcode()) {
+ default: return ARM_AM::no_shift;
+ case ISD::SHL: return ARM_AM::lsl;
+ case ISD::SRL: return ARM_AM::lsr;
+ case ISD::SRA: return ARM_AM::asr;
+ case ISD::ROTR: return ARM_AM::ror;
+ //case ISD::ROTL: // Only if imm -> turn into ROTR.
+ // Can't handle RRX here, because it would require folding a flag into
+ // the addressing mode. :( This causes us to miss certain things.
+ //case ARMISD::RRX: return ARM_AM::rrx;
+ }
+ }
+
+ enum AMSubMode {
+ bad_am_submode = 0,
+ ia,
+ ib,
+ da,
+ db
+ };
+
+ static inline const char *getAMSubModeStr(AMSubMode Mode) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return "ia";
+ case ARM_AM::ib: return "ib";
+ case ARM_AM::da: return "da";
+ case ARM_AM::db: return "db";
+ }
+ }
+
+ static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+ switch (Mode) {
+ default: assert(0 && "Unknown addressing sub-mode!");
+ case ARM_AM::ia: return isLD ? "fd" : "ea";
+ case ARM_AM::ib: return isLD ? "ed" : "fa";
+ case ARM_AM::da: return isLD ? "fa" : "ed";
+ case ARM_AM::db: return isLD ? "ea" : "fd";
+ }
+ }
+
+ /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+ ///
+ static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val >> Amt) | (Val << ((32-Amt)&31));
+ }
+
+ /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+ ///
+ static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+ assert(Amt < 32 && "Invalid rotate amount");
+ return (Val << Amt) | (Val >> ((32-Amt)&31));
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #1: shift_operand with registers
+ //===--------------------------------------------------------------------===//
+ //
+ // This 'addressing mode' is used for arithmetic instructions. It can
+ // represent things like:
+ // reg
+ // reg [asr|lsl|lsr|ror|rrx] reg
+ // reg [asr|lsl|lsr|ror|rrx] imm
+ //
+ // This is stored three operands [rega, regb, opc]. The first is the base
+ // reg, the second is the shift amount (or reg0 if not present or imm). The
+ // third operand encodes the shift opcode and the imm if a reg isn't present.
+ //
+ static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+ return ShOp | (Imm << 3);
+ }
+ static inline unsigned getSORegOffset(unsigned Op) {
+ return Op >> 3;
+ }
+ static inline ShiftOpc getSORegShOp(unsigned Op) {
+ return (ShiftOpc)(Op & 7);
+ }
+
+ /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+ /// the 8-bit imm value.
+ static inline unsigned getSOImmValImm(unsigned Imm) {
+ return Imm & 0xFF;
+ }
+ /// getSOImmValRotate - Given an encoded imm field for the reg/imm form, return
+ /// the rotate amount.
+ static inline unsigned getSOImmValRot(unsigned Imm) {
+ return (Imm >> 8) * 2;
+ }
+
+ /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+ /// computing the rotate amount to use. If this immediate value cannot be
+ /// handled with a single shifter-op, determine a good rotate amount that will
+ /// take a maximal chunk of bits out of the immediate.
+ static inline unsigned getSOImmValRotate(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the rotate amount.
+ unsigned TZ = CountTrailingZeros_32(Imm);
+
+ // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
+ // not 9.
+ unsigned RotAmt = TZ & ~1;
+
+ // If we can handle this spread, return it.
+ if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ return (32-RotAmt)&31; // HW rotates right, not left.
+
+ // For values like 0xF000000F, we should skip the first run of ones, then
+ // retry the hunt.
+ if (Imm & 1) {
+ unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+ if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF
+ // Restart the search for a high-order bit after the initial seconds of
+ // ones.
+ unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+
+ // Rotate amount must be even.
+ unsigned RotAmt2 = TZ2 & ~1;
+
+ // If this fits, use it.
+ if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+ return (32-RotAmt2)&31; // HW rotates right, not left.
+ }
+ }
+
+ // Otherwise, we have no way to cover this span of bits with a single
+ // shifter_op immediate. Return a chunk of bits that will be useful to
+ // handle.
+ return (32-RotAmt)&31; // HW rotates right, not left.
+ }
+
+ /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+ /// into an shifter_operand immediate operand, return the 12-bit encoding for
+ /// it. If not, return -1.
+ static inline int getSOImmVal(unsigned Arg) {
+ // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+ // of zero.
+ if ((Arg & ~255U) == 0) return Arg;
+
+ unsigned RotAmt = getSOImmValRotate(Arg);
+
+ // If this cannot be handled with a single shifter_op, bail out.
+ if (rotr32(~255U, RotAmt) & Arg)
+ return -1;
+
+ // Encode this correctly.
+ return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ }
+
+ /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+ /// or'ing together two SOImmVal's.
+ static inline bool isSOImmTwoPartVal(unsigned V) {
+ // If this can be handled with a single shifter_op, bail out.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ if (V == 0)
+ return false;
+
+ // If this can be handled with two shifter_op's, accept.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ return V == 0;
+ }
+
+ /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the first chunk of it.
+ static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+ return rotr32(255U, getSOImmValRotate(V)) & V;
+ }
+
+ /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+ /// return the second chunk of it.
+ static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+ // Mask out the first hunk.
+ V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+ // Take what's left.
+ assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ return V;
+ }
+
+ /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+ /// by a left shift. Returns the shift amount to use.
+ static inline unsigned getThumbImmValShift(unsigned Imm) {
+ // 8-bit (or less) immediates are trivially immediate operand with a shift
+ // of zero.
+ if ((Imm & ~255U) == 0) return 0;
+
+ // Use CTZ to compute the shift amount.
+ return CountTrailingZeros_32(Imm);
+ }
+
+ /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+ /// by left shifting a 8-bit immediate.
+ static inline bool isThumbImmShiftedVal(unsigned V) {
+ // If this can be handled with
+ V = (~255U << getThumbImmValShift(V)) & V;
+ return V == 0;
+ }
+
+ /// getThumbImmNonShiftedVal - If V is a value that satisfies
+ /// isThumbImmShiftedVal, return the non-shiftd value.
+ static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+ return V >> getThumbImmValShift(V);
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #2
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for most simple load/store instructions.
+ //
+ // addrmode2 := reg +/- reg shop imm
+ // addrmode2 := reg +/- imm12
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ //
+ // If this addressing mode is a frame index (before prolog/epilog insertion
+ // and code rewriting), this operand will have the form: FI#, reg0, <offs>
+ // with no shift amount for the frame offset.
+ //
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ assert(Imm12 < (1 << 12) && "Imm too large!");
+ bool isSub = Opc == sub;
+ return Imm12 | ((int)isSub << 12) | (SO << 13);
+ }
+ static inline unsigned getAM2Offset(unsigned AM2Opc) {
+ return AM2Opc & ((1 << 12)-1);
+ }
+ static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+ return ((AM2Opc >> 12) & 1) ? sub : add;
+ }
+ static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+ return (ShiftOpc)(AM2Opc >> 13);
+ }
+
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #3
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for sign-extending loads, and load/store-pair instructions.
+ //
+ // addrmode3 := reg +/- reg
+ // addrmode3 := reg +/- imm8
+ //
+ // The first operand is always a Reg. The second operand is a reg if in
+ // reg/reg form, otherwise it's reg#0. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+
+ /// getAM3Opc - This function encodes the addrmode3 opc field.
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+ return AM3Opc & 0xFF;
+ }
+ static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+ return ((AM3Opc >> 8) & 1) ? sub : add;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #4
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for load / store multiple instructions.
+ //
+ // addrmode4 := reg, <mode>
+ //
+ // The four modes are:
+ // IA - Increment after
+ // IB - Increment before
+ // DA - Decrement after
+ // DB - Decrement before
+ //
+ // If the 4th bit (writeback)is set, then the base register is updated after
+ // the memory transfer.
+
+ static inline AMSubMode getAM4SubMode(unsigned Mode) {
+ return (AMSubMode)(Mode & 0x7);
+ }
+
+ static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+ return (int)SubMode | ((int)WB << 3);
+ }
+
+ static inline bool getAM4WBFlag(unsigned Mode) {
+ return (Mode >> 3) & 1;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // Addressing Mode #5
+ //===--------------------------------------------------------------------===//
+ //
+ // This is used for coprocessor instructions, such as FP load/stores.
+ //
+ // addrmode5 := reg +/- imm8*4
+ //
+ // The first operand is always a Reg. The third field encodes the operation
+ // in bit 8, the immediate in bits 0-7.
+ //
+ // This can also be used for FP load/store multiple ops. The third field encodes
+ // writeback mode in bit 8, the number of registers (or 2 times the number of
+ // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+ // following two sub-modes:
+ //
+ // IA - Increment after
+ // DB - Decrement before
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field.
+ static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+ bool isSub = Opc == sub;
+ return ((int)isSub << 8) | Offset;
+ }
+ static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+ return AM5Opc & 0xFF;
+ }
+ static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1) ? sub : add;
+ }
+
+ /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+ /// FSTM instructions.
+ static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+ unsigned char Offset) {
+ assert((SubMode == ia || SubMode == db) &&
+ "Illegal addressing mode 5 sub-mode!");
+ return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+ }
+ static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+ return (AMSubMode)((AM5Opc >> 9) & 0x7);
+ }
+ static inline bool getAM5WBFlag(unsigned AM5Opc) {
+ return ((AM5Opc >> 8) & 1);
+ }
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 653d5007ab..a6ceb85cb3 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -15,54 +15,49 @@
#define DEBUG_TYPE "asm-printer"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineDebugInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Mangler.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
#include "llvm/Support/MathExtras.h"
#include <cctype>
+#include <iostream>
+#include <set>
using namespace llvm;
STATISTIC(EmittedInsts, "Number of machine instrs printed");
namespace {
- static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
- switch (CC) {
- default: assert(0 && "Unknown condition code");
- case ARMCC::EQ: return "eq";
- case ARMCC::NE: return "ne";
- case ARMCC::CS: return "cs";
- case ARMCC::CC: return "cc";
- case ARMCC::MI: return "mi";
- case ARMCC::PL: return "pl";
- case ARMCC::VS: return "vs";
- case ARMCC::VC: return "vc";
- case ARMCC::HI: return "hi";
- case ARMCC::LS: return "ls";
- case ARMCC::GE: return "ge";
- case ARMCC::LT: return "lt";
- case ARMCC::GT: return "gt";
- case ARMCC::LE: return "le";
- case ARMCC::AL: return "al";
- }
- }
-
struct VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
ARMAsmPrinter(std::ostream &O, TargetMachine &TM, const TargetAsmInfo *T)
- : AsmPrinter(O, TM, T) {
+ : AsmPrinter(O, TM, T), DW(O, this, T), AFI(NULL), InCPMode(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
+ DwarfWriter DW;
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// AFI - Keep a pointer to ARMFunctionInfo for the current
+ /// MachineFunction
+ ARMFunctionInfo *AFI;
+
/// We name each basic block in a Function with a unique number, so
/// that we can consistently refer to them later. This is cleared
/// at the beginning of each call to runOnMachineFunction().
@@ -70,22 +65,79 @@ namespace {
typedef std::map<const Value *, unsigned> ValueMapTy;
ValueMapTy NumberForBB;
+ /// Keeps the set of GlobalValues that require non-lazy-pointers for
+ /// indirect access.
+ std::set<std::string> GVNonLazyPtrs;
+
+ /// Keeps the set of external function GlobalAddresses that the asm
+ /// printer should generate stubs for.
+ std::set<std::string> FnStubs;
+
+ /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+ bool InCPMode;
+
virtual const char *getPassName() const {
return "ARM Assembly Printer";
}
- void printAddrMode1(const MachineInstr *MI, int opNum);
- void printAddrMode2(const MachineInstr *MI, int opNum);
- void printAddrMode5(const MachineInstr *MI, int opNum);
- void printOperand(const MachineInstr *MI, int opNum);
- void printMemOperand(const MachineInstr *MI, int opNum,
- const char *Modifier = 0);
+ void printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSOImmOperand(const MachineInstr *MI, int opNum);
+ void printSORegOperand(const MachineInstr *MI, int opNum);
+ void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+ void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+ void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier = 0);
+ void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+ unsigned Scale);
+ void printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int OpNo);
+ void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
void printCCOperand(const MachineInstr *MI, int opNum);
+ void printPCLabel(const MachineInstr *MI, int opNum);
+ void printRegisterList(const MachineInstr *MI, int opNum);
+ void printCPInstOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier);
+ void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
bool printInstruction(const MachineInstr *MI); // autogenerated.
+ void printMachineInstruction(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &F);
bool doInitialization(Module &M);
bool doFinalization(Module &M);
+
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ printDataDirective(MCPV->getType());
+
+ ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MCPV;
+ std::string Name = Mang->getValueName(ACPV->getGV());
+ if (ACPV->isNonLazyPointer()) {
+ GVNonLazyPtrs.insert(Name);
+ O << TAI->getPrivateGlobalPrefix() << Name << "$non_lazy_ptr";
+ } else
+ O << Name;
+ if (ACPV->getPCAdjustment() != 0)
+ O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+ << utostr(ACPV->getLabelId())
+ << "+" << (unsigned)ACPV->getPCAdjustment() << ")";
+ O << "\n";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDebugInfo>();
+ }
};
} // end of anonymous namespace
@@ -97,55 +149,64 @@ namespace {
/// regardless of whether the function is in SSA form.
///
FunctionPass *llvm::createARMCodePrinterPass(std::ostream &o,
- TargetMachine &tm) {
+ ARMTargetMachine &tm) {
return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo());
}
-/// runOnMachineFunction - This uses the printMachineInstruction()
+/// runOnMachineFunction - This uses the printInstruction()
/// method to print assembly for each instruction.
///
bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- SetupMachineFunction(MF);
- O << "\n\n";
-
- // Print out constants referenced by the function
- EmitConstantPool(MF.getConstantPool());
-
- const std::vector<MachineConstantPoolEntry>
- &CP = MF.getConstantPool()->getConstants();
- for (unsigned i = 0, e = CP.size(); i != e; ++i) {
- MachineConstantPoolEntry CPE = CP[i];
- if (!CPE.isMachineConstantPoolEntry()){
- Constant *CV = CPE.Val.ConstVal;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
- if (GV->hasExternalWeakLinkage()) {
- ExtWeakSymbols.insert(GV);
- }
- }
- }
+ AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (Subtarget->isDarwin()) {
+ DW.SetDebugInfo(&getAnalysis<MachineDebugInfo>());
}
- // Print out jump tables referenced by the function
- EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+ SetupMachineFunction(MF);
+ O << "\n";
+
+ // NOTE: we don't print out constant pools here, they are handled as
+ // instructions.
+ O << "\n";
// Print out labels for the function.
const Function *F = MF.getFunction();
- SwitchToTextSection(getSectionForFunction(*F).c_str(), F);
-
switch (F->getLinkage()) {
default: assert(0 && "Unknown linkage type!");
case Function::InternalLinkage:
+ SwitchToTextSection("\t.text", F);
break;
case Function::ExternalLinkage:
+ SwitchToTextSection("\t.text", F);
O << "\t.globl\t" << CurrentFnName << "\n";
break;
case Function::WeakLinkage:
case Function::LinkOnceLinkage:
- O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ if (Subtarget->isDarwin()) {
+ SwitchToTextSection(
+ ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+ O << "\t.globl\t" << CurrentFnName << "\n";
+ O << "\t.weak_definition\t" << CurrentFnName << "\n";
+ } else {
+ O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+ }
break;
}
- EmitAlignment(2, F);
+
+ if (AFI->isThumbFunction()) {
+ EmitAlignment(1, F);
+ O << "\t.code\t16\n";
+ O << "\t.thumb_func\t" << CurrentFnName << "\n";
+ InCPMode = false;
+ } else
+ EmitAlignment(2, F);
+
O << CurrentFnName << ":\n";
+ if (Subtarget->isDarwin()) {
+ // Emit pre-function debug information.
+ DW.BeginFunction(&MF);
+ }
// Print out code for the function.
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -158,127 +219,340 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
II != E; ++II) {
// Print the assembly for the instruction.
- O << "\t";
- ++EmittedInsts;
- printInstruction(II);
- }
- }
-
- return false;
-}
-
-void ARMAsmPrinter::printAddrMode1(const MachineInstr *MI, int opNum) {
- const MachineOperand &Arg = MI->getOperand(opNum);
- const MachineOperand &Shift = MI->getOperand(opNum + 1);
- const MachineOperand &ShiftType = MI->getOperand(opNum + 2);
-
- if(Arg.isImmediate()) {
- assert(Shift.getImmedValue() == 0);
- printOperand(MI, opNum);
- } else {
- assert(Arg.isRegister());
- printOperand(MI, opNum);
- if(Shift.isRegister() || Shift.getImmedValue() != 0) {
- const char *s = NULL;
- switch(ShiftType.getImmedValue()) {
- case ARMShift::LSL:
- s = ", lsl ";
- break;
- case ARMShift::LSR:
- s = ", lsr ";
- break;
- case ARMShift::ASR:
- s = ", asr ";
- break;
- case ARMShift::ROR:
- s = ", ror ";
- break;
- case ARMShift::RRX:
- s = ", rrx ";
- break;
- }
- O << s;
- printOperand(MI, opNum + 1);
+ printMachineInstruction(II);
}
}
-}
-void ARMAsmPrinter::printAddrMode2(const MachineInstr *MI, int opNum) {
- const MachineOperand &Arg = MI->getOperand(opNum);
- const MachineOperand &Offset = MI->getOperand(opNum + 1);
- assert(Offset.isImmediate());
+ if (TAI->hasDotTypeDotSizeDirective())
+ O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
- if (Arg.isConstantPoolIndex()) {
- assert(Offset.getImmedValue() == 0);
- printOperand(MI, opNum);
- } else {
- assert(Arg.isRegister());
- O << '[';
- printOperand(MI, opNum);
- O << ", ";
- printOperand(MI, opNum + 1);
- O << ']';
+ if (Subtarget->isDarwin()) {
+ // Emit post-function debug information.
+ DW.EndFunction();
}
-}
-
-void ARMAsmPrinter::printAddrMode5(const MachineInstr *MI, int opNum) {
- const MachineOperand &Arg = MI->getOperand(opNum);
- const MachineOperand &Offset = MI->getOperand(opNum + 1);
- assert(Offset.isImmediate());
- if (Arg.isConstantPoolIndex()) {
- assert(Offset.getImmedValue() == 0);
- printOperand(MI, opNum);
- } else {
- assert(Arg.isRegister());
- O << '[';
- printOperand(MI, opNum);
- O << ", ";
- printOperand(MI, opNum + 1);
- O << ']';
- }
+ return false;
}
-void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
- const MachineOperand &MO = MI->getOperand (opNum);
- const MRegisterInfo &RI = *TM.getRegisterInfo();
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
if (MRegisterInfo::isPhysicalRegister(MO.getReg()))
- O << LowercaseString (RI.get(MO.getReg()).Name);
+ O << TM.getRegisterInfo()->get(MO.getReg()).Name;
else
assert(0 && "not implemented");
break;
- case MachineOperand::MO_Immediate:
- O << "#" << (int)MO.getImmedValue();
+ case MachineOperand::MO_Immediate: {
+ if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+ O << "#";
+
+ O << (int)MO.getImmedValue();
break;
+ }
case MachineOperand::MO_MachineBasicBlock:
printBasicBlockLabel(MO.getMachineBasicBlock());
return;
case MachineOperand::MO_GlobalAddress: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
GlobalValue *GV = MO.getGlobal();
std::string Name = Mang->getValueName(GV);
- O << Name;
- if (GV->hasExternalWeakLinkage()) {
+ bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ if (isExt && isCallOp && Subtarget->isDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ FnStubs.insert(Name);
+ } else
+ O << Name;
+
+ if (GV->hasExternalWeakLinkage())
ExtWeakSymbols.insert(GV);
- }
- }
break;
- case MachineOperand::MO_ExternalSymbol:
- O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ bool isCallOp = Modifier && !strcmp(Modifier, "call");
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ if (isCallOp && Subtarget->isDarwin() &&
+ TM.getRelocationModel() != Reloc::Static) {
+ O << TAI->getPrivateGlobalPrefix() << Name << "$stub";
+ FnStubs.insert(Name);
+ } else
+ O << Name;
break;
+ }
case MachineOperand::MO_ConstantPoolIndex:
O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
<< '_' << MO.getConstantPoolIndex();
break;
+ case MachineOperand::MO_JumpTableIndex:
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getJumpTableIndex();
+ break;
default:
O << "<unknown operand type>"; abort (); break;
}
}
-void ARMAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- const char *Modifier) {
- assert(0 && "not implemented");
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ assert(MO.isImmediate() && (MO.getImmedValue() < (1 << 12)) &&
+ "Not a valid so_imm value!");
+ unsigned Imm = ARM_AM::getSOImmValImm(MO.getImmedValue());
+ unsigned Rot = ARM_AM::getSOImmValRot(MO.getImmedValue());
+
+ // Print low-level immediate formation info, per
+ // A5.1.3: "Data-processing operands - Immediate".
+ if (Rot) {
+ O << "#" << Imm << ", " << Rot;
+ // Pretty printed version.
+ O << ' ' << TAI->getCommentString() << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+ } else {
+ O << "#" << Imm;
+ }
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms. This includes:
+// REG 0 0 - e.g. R5
+// REG REG 0,SH_OPC - e.g. R5, ROR R3
+// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImmedValue()))
+ << " ";
+
+ if (MO2.getReg()) {
+ assert(MRegisterInfo::isPhysicalRegister(MO2.getReg()));
+ O << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+ assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+ } else {
+ O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (!MO2.getReg()) {
+ if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+ O << ", #"
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << ARM_AM::getAM2Offset(MO3.getImm());
+ O << "]";
+ return;
+ }
+
+ O << ", "
+ << (char)ARM_AM::getAM2Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).Name;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImmedValue()))
+ << " #" << ShImm;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.getReg()) {
+ if (ARM_AM::getAM2Offset(MO2.getImm())) // Don't print +0.
+ O << "#"
+ << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << ARM_AM::getAM2Offset(MO2.getImm());
+ return;
+ }
+
+ O << (char)ARM_AM::getAM2Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImmedValue()))
+ << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (MO2.getReg()) {
+ O << ", "
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << TM.getRegisterInfo()->get(MO2.getReg()).Name
+ << "]";
+ return;
+ }
+
+ if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+ O << ", #"
+ << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << ImmOffs;
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (MO1.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+ O << "#"
+ << (char)ARM_AM::getAM3Op(MO2.getImm())
+ << ImmOffs;
+}
+
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ if (MO1.getReg() == ARM::SP) {
+ bool isLDM = (MI->getOpcode() == ARM::LDM ||
+ MI->getOpcode() == ARM::LDM_RET);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ } else {
+ printOperand(MI, Op);
+ if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+ O << "!";
+ }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+ if (Modifier && strcmp(Modifier, "submode") == 0) {
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+ if (MO1.getReg() == ARM::SP) {
+ bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+ MI->getOpcode() == ARM::FLDMS);
+ O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+ } else
+ O << ARM_AM::getAMSubModeStr(Mode);
+ return;
+ } else if (Modifier && strcmp(Modifier, "base") == 0) {
+ // Used for FSTM{D|S} and LSTM{D|S} operations.
+ O << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+ O << "!";
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+
+ if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+ O << ", #"
+ << (char)ARM_AM::getAM5Op(MO2.getImm())
+ << ImmOffs*4;
+ }
+ O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+ const char *Modifier) {
+ if (Modifier && strcmp(Modifier, "label") == 0) {
+ printPCLabel(MI, Op+1);
+ return;
+ }
+
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ assert(MRegisterInfo::isPhysicalRegister(MO1.getReg()));
+ O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).Name << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+ unsigned Scale) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+ if (!MO1.isRegister()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op);
+ return;
+ }
+
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (unsigned ImmOffs = MO2.getImm()) {
+ O << ", #" << ImmOffs;
+ if (Scale > 1)
+ O << " * " << Scale;
+ }
+ O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5_1Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeRI5_2Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeRI5_4Operand(const MachineInstr *MI, int Op) {
+ printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+ const MachineOperand &MO1 = MI->getOperand(Op);
+ const MachineOperand &MO2 = MI->getOperand(Op+1);
+ O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).Name;
+ if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs << " * 4";
+ O << "]";
}
void ARMAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
@@ -286,9 +560,140 @@ void ARMAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
O << ARMCondCodeToString((ARMCC::CondCodes)CC);
}
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+ int Id = (int)MI->getOperand(opNum).getImmedValue();
+ O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+ O << "{";
+ for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+ printOperand(MI, i);
+ if (i != e-1) O << ", ";
+ }
+ O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+ const char *Modifier) {
+ assert(Modifier && "This operand only works with a modifier!");
+ // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+ // data itself.
+ if (!strcmp(Modifier, "label")) {
+ unsigned ID = MI->getOperand(OpNo).getImm();
+ O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << ID << ":\n";
+ } else {
+ assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+ unsigned CPI = MI->getOperand(OpNo).getConstantPoolIndex();
+
+ const MachineConstantPoolEntry &MCPE = // Chasing pointers is fun?
+ MI->getParent()->getParent()->getConstantPool()->getConstants()[CPI];
+
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+ const MachineOperand &MO1 = MI->getOperand(OpNo);
+ const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+ unsigned JTI = MO1.getJumpTableIndex();
+ O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImmedValue() << ":\n";
+
+ const char *JTEntryDirective = TAI->getJumpTableDirective();
+ if (!JTEntryDirective)
+ JTEntryDirective = TAI->getData32bitsDirective();
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+ std::set<MachineBasicBlock*> JTSets;
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (UseSet && JTSets.insert(MBB).second)
+ printSetLabel(JTI, MO2.getImmedValue(), MBB);
+
+ O << JTEntryDirective << ' ';
+ if (UseSet)
+ O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+ << '_' << JTI << '_' << MO2.getImmedValue()
+ << "_set_" << MBB->getNumber();
+ else if (TM.getRelocationModel() == Reloc::PIC_) {
+ printBasicBlockLabel(MBB, false, false);
+ // If the arch uses custom Jump Table directives, don't calc relative to JT
+ if (!TAI->getJumpTableDirective())
+ O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+ << getFunctionNumber() << '_' << JTI << '_' << MO2.getImmedValue();
+ } else
+ printBasicBlockLabel(MBB, false, false);
+ O << '\n';
+ }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'Q':
+ if (TM.getTargetData()->isLittleEndian())
+ break;
+ // Fallthrough
+ case 'R':
+ if (TM.getTargetData()->isBigEndian())
+ break;
+ // Fallthrough
+ case 'H': // Write second word of DI / DF reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isRegister() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isRegister())
+ return true;
+ ++OpNo; // Return the high-part.
+ }
+ }
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+ ++EmittedInsts;
+
+ if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+ if (!InCPMode && AFI->isThumbFunction()) {
+ EmitAlignment(2);
+ InCPMode = true;
+ }
+ } else {
+ if (InCPMode && AFI->isThumbFunction()) {
+ EmitAlignment(1);
+ InCPMode = false;
+ }
+ O << "\t";
+ }
+
+ // Call the autogenerated instruction printer routines.
+ printInstruction(MI);
+}
+
bool ARMAsmPrinter::doInitialization(Module &M) {
- AsmPrinter::doInitialization(M);
- return false; // success
+ if (Subtarget->isDarwin()) {
+ // Emit initial debug information.
+ DW.BeginModule(&M);
+ }
+
+ return AsmPrinter::doInitialization(M);
}
bool ARMAsmPrinter::doFinalization(Module &M) {
@@ -302,53 +707,154 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
if (EmitSpecialLLVMGlobal(I))
continue;
- O << "\n\n";
std::string name = Mang->getValueName(I);
Constant *C = I->getInitializer();
unsigned Size = TD->getTypeSize(C->getType());
- unsigned Align = Log2_32(TD->getTypeAlignment(C->getType()));
+ unsigned Align = TD->getPreferredAlignmentLog(I);
if (C->isNullValue() &&
!I->hasSection() &&
- (I->hasLinkOnceLinkage() || I->hasInternalLinkage() ||
- I->hasWeakLinkage())) {
- SwitchToDataSection(".data", I);
- if (I->hasInternalLinkage())
- O << "\t.local " << name << "\n";
-
- O << "\t.comm " << name << "," << Size
- << "," << (unsigned) (1 << Align);
- O << "\n";
+ (I->hasInternalLinkage() || I->hasWeakLinkage() ||
+ I->hasLinkOnceLinkage() ||
+ (Subtarget->isDarwin() && I->hasExternalLinkage()))) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (I->hasExternalLinkage()) {
+ O << "\t.globl\t" << name << "\n";
+ O << "\t.zerofill __DATA__, __common, " << name << ", "
+ << Size << ", " << Align;
+ } else {
+ SwitchToDataSection(TAI->getDataSection(), I);
+ if (TAI->getLCOMMDirective() != NULL) {
+ if (I->hasInternalLinkage()) {
+ O << TAI->getLCOMMDirective() << name << "," << Size;
+ if (Subtarget->isDarwin())
+ O << "," << Align;
+ } else
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ } else {
+ if (I->hasInternalLinkage())
+ O << "\t.local\t" << name << "\n";
+ O << TAI->getCOMMDirective() << name << "," << Size;
+ if (TAI->getCOMMDirectiveTakesAlignment())
+ O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+ }
+ }
+ O << "\t\t" << TAI->getCommentString() << " " << I->getName() << "\n";
+ continue;
} else {
switch (I->getLinkage()) {
default:
assert(0 && "Unknown linkage type!");
break;
+ case GlobalValue::LinkOnceLinkage:
+ case GlobalValue::WeakLinkage:
+ if (Subtarget->isDarwin()) {
+ O << "\t.globl " << name << "\n"
+ << "\t.weak_definition " << name << "\n";
+ SwitchToDataSection("\t.section __DATA,__const_coal,coalesced", I);
+ } else {
+ O << "\t.section\t.llvm.linkonce.d." << name << ",\"aw\",@progbits\n"
+ << "\t.weak " << name << "\n";
+ }
+ break;
case GlobalValue::ExternalLinkage:
O << "\t.globl " << name << "\n";
- break;
+ // FALL THROUGH
case GlobalValue::InternalLinkage:
- break;
- }
-
- if (I->hasSection() &&
- (I->getSection() == ".ctors" ||
- I->getSection() == ".dtors")) {
- std::string SectionName = ".section " + I->getSection();
+ if (I->isConstant()) {
+ const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
+ if (TAI->getCStringSection() && CVA && CVA->isCString()) {
+ SwitchToDataSection(TAI->getCStringSection(), I);
+ break;
+ }
+ }
- SectionName += ",\"aw\",%progbits";
+ if (I->hasSection() &&
+ (I->getSection() == ".ctors" ||
+ I->getSection() == ".dtors")) {
+ assert(!Subtarget->isDarwin());
+ std::string SectionName = ".section " + I->getSection();
+ SectionName += ",\"aw\",@progbits";
+ SwitchToDataSection(SectionName.c_str());
+ } else {
+ SwitchToDataSection(TAI->getDataSection(), I);
+ }
- SwitchToDataSection(SectionName.c_str());
- } else {
- SwitchToDataSection(TAI->getDataSection(), I);
+ break;
}
+ }
- EmitAlignment(Align, I);
+ EmitAlignment(Align, I);
+ if (TAI->hasDotTypeDotSizeDirective()) {
O << "\t.type " << name << ", %object\n";
O << "\t.size " << name << ", " << Size << "\n";
- O << name << ":\n";
- EmitGlobalConstant(C);
}
+ O << name << ":\n";
+
+ // If the initializer is a extern weak symbol, remember to emit the weak
+ // reference!
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ EmitGlobalConstant(C);
+ O << '\n';
+ }
+
+ if (Subtarget->isDarwin()) {
+ // Output stubs for dynamically-linked functions
+ unsigned j = 1;
+ for (std::set<std::string>::iterator i = FnStubs.begin(), e = FnStubs.end();
+ i != e; ++i, ++j) {
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+ "none,16", 0);
+ else
+ SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+ "none,12", 0);
+
+ EmitAlignment(2);
+ O << "\t.code\t32\n";
+
+ O << "L" << *i << "$stub:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\tldr ip, L" << *i << "$slp\n";
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ O << "L" << *i << "$scv:\n";
+ O << "\tadd ip, pc, ip\n";
+ }
+ O << "\tldr pc, [ip, #0]\n";
+ O << "L" << *i << "$slp:\n";
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ O << "\t.long\tL" << *i << "$lazy_ptr-(L" << *i << "$scv+8)\n";
+ else
+ O << "\t.long\tL" << *i << "$lazy_ptr\n";
+ SwitchToDataSection(".lazy_symbol_pointer", 0);
+ O << "L" << *i << "$lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\t.long\tdyld_stub_binding_helper\n";
+ }
+ O << "\n";
+
+ // Output non-lazy-pointers for external and common global variables.
+ if (GVNonLazyPtrs.begin() != GVNonLazyPtrs.end())
+ SwitchToDataSection(".non_lazy_symbol_pointer", 0);
+ for (std::set<std::string>::iterator i = GVNonLazyPtrs.begin(),
+ e = GVNonLazyPtrs.end(); i != e; ++i) {
+ O << "L" << *i << "$non_lazy_ptr:\n";
+ O << "\t.indirect_symbol " << *i << "\n";
+ O << "\t.long\t0\n";
+ }
+
+ // Emit initial debug information.
+ DW.EndModule();
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ O << "\t.subsections_via_symbols\n";
}
AsmPrinter::doFinalization(M);
diff --git a/lib/Target/ARM/ARMCommon.cpp b/lib/Target/ARM/ARMCommon.cpp
deleted file mode 100644
index fd3757303b..0000000000
--- a/lib/Target/ARM/ARMCommon.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- ARMCommon.cpp - Define support functions for ARM --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file was developed by the "Instituto Nokia de Tecnologia" and
-// is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-#include "ARMCommon.h"
-
-static inline unsigned rotateL(unsigned x, unsigned n){
- return ((x << n) | (x >> (32 - n)));
-}
-
-static inline unsigned rotateR(unsigned x, unsigned n){
- return ((x >> n) | (x << (32 - n)));
-}
-
-// finds the end position of largest sequence of zeros in binary representation
-// of 'immediate'.
-static int findLargestZeroSequence(unsigned immediate){
- int max_zero_pos = 0;
- int max_zero_length = 0;
- int zero_pos;
- int zero_length;
- int pos = 0;
- int end_pos;
-
- while ((immediate & 0x3) == 0) {
- immediate = rotateR(immediate, 2);
- pos+=2;
- }
- end_pos = pos+32;
-
- while (pos<end_pos){
- while (((immediate & 0x3) != 0)&&(pos<end_pos)) {
- immediate = rotateR(immediate, 2);
- pos+=2;
- }
- zero_pos = pos;
- while (((immediate & 0x3) == 0)&&(pos<end_pos)) {
- immediate = rotateR(immediate, 2);
- pos+=2;
- }
- zero_length = pos - zero_pos;
- if (zero_length > max_zero_length){
- max_zero_length = zero_length;
- max_zero_pos = zero_pos % 32;
- }
-
- }
-
- return (max_zero_pos + max_zero_length) % 32;
-}
-
-std::vector<unsigned> splitImmediate(unsigned immediate){
- std::vector<unsigned> immediatePieces;
-
- if (immediate == 0){
- immediatePieces.push_back(0);
- } else {
- int start_pos = findLargestZeroSequence(immediate);
- unsigned immediate_tmp = rotateR(immediate, start_pos);
- int pos = 0;
- while (pos < 32){
- while(((immediate_tmp&0x3) == 0)&&(pos<32)){
- immediate_tmp = rotateR(immediate_tmp,2);
- pos+=2;
- }
- if (pos < 32){
- immediatePieces.push_back(rotateL(immediate_tmp&0xFF,
- (start_pos + pos) % 32 ));
- immediate_tmp = rotateR(immediate_tmp,8);
- pos+=8;
- }
- }
- }
- return immediatePieces;
-}
diff --git a/lib/Target/ARM/ARMCommon.h b/lib/Target/ARM/ARMCommon.h
deleted file mode 100644
index c35150b7a4..0000000000
--- a/lib/Target/ARM/ARMCommon.h
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- ARMCommon.h - Define support functions for ARM ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file was developed by the "Instituto Nokia de Tecnologia" and
-// is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_COMMON_H
-#define ARM_COMMON_H
-
-#include <vector>
-
-std::vector<unsigned> splitImmediate(unsigned immediate);
-
-#endif
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000000..183bde8824
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,490 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function. This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <iostream>
+using namespace llvm;
+
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+
+namespace {
+ /// ARMConstantIslands - Due to limited pc-relative displacements, ARM
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool, instead, it places constants where-ever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+ class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+ /// NextUID - Assign unique ID's to CPE's.
+ unsigned NextUID;
+
+ /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+ /// by MBB Number.
+ std::vector<unsigned> BBSizes;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ unsigned MaxDisp;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ const TargetInstrInfo *TII;
+ const TargetAsmInfo *TAI;
+ public:
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM constant island placement pass";
+ }
+
+ private:
+ void DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs);
+ void InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs);
+ void SplitBlockBeforeInstr(MachineInstr *MI);
+ bool HandleConstantPoolUser(MachineFunction &Fn, CPUser &U);
+ void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+
+ unsigned GetInstSize(MachineInstr *MI) const;
+ unsigned GetOffsetOf(MachineInstr *MI) const;
+ };
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+ // If there are no constants, there is nothing to do.
+ MachineConstantPool &MCP = *Fn.getConstantPool();
+ if (MCP.isEmpty()) return false;
+
+ TII = Fn.getTarget().getInstrInfo();
+ TAI = Fn.getTarget().getTargetAsmInfo();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ Fn.RenumberBlocks();
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ DoInitialPlacement(Fn, CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ NextUID = CPEMIs.size();
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ InitialFunctionScan(Fn, CPEMIs);
+ CPEMIs.clear();
+
+ // Iteratively place constant pool entries until there is no change.
+ bool MadeChange;
+ do {
+ MadeChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ MadeChange |= HandleConstantPoolUser(Fn, CPUsers[i]);
+ } while (MadeChange);
+
+ BBSizes.clear();
+ WaterList.clear();
+ CPUsers.clear();
+
+ return true;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+ std::vector<MachineInstr*> &CPEMIs){
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = new MachineBasicBlock();
+ Fn.getBasicBlockList().push_back(BB);
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs =
+ Fn.getConstantPool()->getConstants();
+
+ const TargetData &TD = *Fn.getTarget().getTargetData();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeSize(CPs[i].getType());
+ // Verify that all constant pool entries are a multiple of 4 bytes. If not,
+ // we would have to pad them out or something so that instructions stay
+ // aligned.
+ assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+ MachineInstr *CPEMI =
+ BuildMI(BB, TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ CPEMIs.push_back(CPEMI);
+ DEBUG(std::cerr << "Moved CPI#" << i << " to end of function as #"
+ << i << "\n");
+ }
+}
+
+/// BBHasFallthrough - Return true of the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ return false;
+
+ MachineBasicBlock *NextBB = next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
+ return false;
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+ const std::vector<MachineInstr*> &CPEMIs) {
+ for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+
+ unsigned MBBSize = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // Add instruction size to MBBSize.
+ MBBSize += GetInstSize(I);
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isConstantPoolIndex()) {
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+ unsigned MaxOffs = 0;
+
+ // Basic size info comes from the TSFlags field.
+ unsigned TSFlags = I->getInstrDescriptor()->TSFlags;
+ switch (TSFlags & ARMII::AddrModeMask) {
+ default:
+ // Constant pool entries can reach anything.
+ if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+ continue;
+ assert(0 && "Unknown addressing mode for CP reference!");
+ case ARMII::AddrMode1: // AM1: 8 bits << 2
+ MaxOffs = 1 << (8+2); // Taking the address of a CP entry.
+ break;
+ case ARMII::AddrMode2:
+ MaxOffs = 1 << 12; // +-offset_12
+ break;
+ case ARMII::AddrMode3:
+ MaxOffs = 1 << 8; // +-offset_8
+ break;
+ // addrmode4 has no immediate offset.
+ case ARMII::AddrMode5:
+ MaxOffs = 1 << (8+2); // +-(offset_8*4)
+ break;
+ case ARMII::AddrModeT1:
+ MaxOffs = 1 << 5;
+ break;
+ case ARMII::AddrModeT2:
+ MaxOffs = 1 << (5+1);
+ break;
+ case ARMII::AddrModeT4:
+ MaxOffs = 1 << (5+2);
+ break;
+ }
+
+ // Remember that this is a user of a CP entry.
+ MachineInstr *CPEMI =CPEMIs[I->getOperand(op).getConstantPoolIndex()];
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+ }
+ }
+ BBSizes.push_back(MBBSize);
+ }
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+ unsigned JTI) {
+ return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMConstantIslands::GetInstSize(MachineInstr *MI) const {
+ // Basic size info comes from the TSFlags field.
+ unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
+
+ switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+ default:
+ // If this machine instr is an inline asm, measure it.
+ if (MI->getOpcode() == ARM::INLINEASM)
+ return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+ assert(0 && "Unknown or unset size field for instr!");
+ break;
+ case ARMII::Size8Bytes: return 8; // Arm instruction x 2.
+ case ARMII::Size4Bytes: return 4; // Arm instruction.
+ case ARMII::Size2Bytes: return 2; // Thumb instruction.
+ case ARMII::SizeSpecial: {
+ switch (MI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
+ case ARM::BR_JTr:
+ case ARM::BR_JTm:
+ case ARM::BR_JTadd: {
+ // These are jumptable branches, i.e. a branch followed by an inlined
+ // jumptable. The size is 4 + 4 * number of entries.
+ unsigned JTI = MI->getOperand(MI->getNumOperands()-2).getJumpTableIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ assert(JTI < JT.size());
+ return getNumJTEntries(JT, JTI) * 4 + 4;
+ }
+ default:
+ // Otherwise, pseudo-instruction sizes are zero.
+ return 0;
+ }
+ }
+ }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = 0;
+
+ // Sum block sizes before MBB.
+ for (unsigned BB = 0, e = MBB->getNumber(); BB != e; ++BB)
+ Offset += BBSizes[BB];
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ if (&*I == MI) return Offset;
+ Offset += GetInstSize(I);
+ }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consequtive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert a size into BBSizes to align it properly with the (newly
+ // renumbered) block numbers.
+ BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ std::vector<MachineBasicBlock*>::iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update datastructures and renumber blocks to
+/// account for this change.
+void ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB = new MachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ OrigBB->getParent()->getBasicBlockList().insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ BuildMI(OrigBB, TII->get(ARM::B)).addMBB(NewBB);
+ NumSplit++;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ while (!OrigBB->succ_empty()) {
+ MachineBasicBlock *Succ = *OrigBB->succ_begin();
+ OrigBB->removeSuccessor(Succ);
+ NewBB->addSuccessor(Succ);
+
+ // This pass should be run after register allocation, so there should be no
+ // PHI nodes to update.
+ assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+ && "PHI nodes should be eliminated by now!");
+ }
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewBB);
+
+ // Figure out how large the first NewMBB is.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += GetInstSize(I);
+
+ // Set the size of NewBB in BBSizes.
+ BBSizes[NewBB->getNumber()] = NewBBSize;
+
+ // We removed instructions from UserMBB, subtract that off from its size.
+ // Add 4 to the block to count the unconditional branch we added to it.
+ BBSizes[OrigBB->getNumber()] -= NewBBSize-4;
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick it up the constant pool value and move it some
+/// place in-range.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, CPUser &U){
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ unsigned UserOffset = GetOffsetOf(UserMI);
+ unsigned CPEOffset = GetOffsetOf(CPEMI);
+
+ DEBUG(std::cerr << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << U.MaxDisp
+ << " at offset " << int(UserOffset-CPEOffset) << "\t"
+ << *UserMI);
+
+ // Check to see if the CPE is already in-range.
+ if (UserOffset < CPEOffset) {
+ // User before the CPE.
+ if (CPEOffset-UserOffset <= U.MaxDisp)
+ return false;
+ } else {
+ if (UserOffset-CPEOffset <= U.MaxDisp)
+ return false;
+ }
+
+
+ // Solution guaranteed to work: split the user's MBB right before the user and
+ // insert a clone the CPE into the newly created water.
+
+ // If the user isn't at the start of its MBB, or if there is a fall-through
+ // into the user's MBB, split the MBB before the User.
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ if (&UserMBB->front() != UserMI ||
+ UserMBB == &Fn.front() || // entry MBB of function.
+ BBHasFallthrough(prior(MachineFunction::iterator(UserMBB)))) {
+ // TODO: Search for the best place to split the code. In practice, using
+ // loop nesting information to insert these guys outside of loops would be
+ // sufficient.
+ SplitBlockBeforeInstr(UserMI);
+
+ // UserMI's BB may have changed.
+ UserMBB = UserMI->getParent();
+ }
+
+ // Okay, we know we can put an island before UserMBB now, do it!
+ MachineBasicBlock *NewIsland = new MachineBasicBlock();
+ Fn.getBasicBlockList().insert(UserMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ UpdateForInsertedWaterBlock(NewIsland);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ unsigned ID = NextUID++;
+ unsigned CPI = CPEMI->getOperand(1).getConstantPoolIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+
+ // Build a new CPE for this user.
+ U.CPEMI = BuildMI(NewIsland, TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+
+ // Increase the size of the island block to account for the new entry.
+ BBSizes[NewIsland->getNumber()] += Size;
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isConstantPoolIndex()) {
+ UserMI->getOperand(i).setConstantPoolIndex(ID);
+ break;
+ }
+
+ DEBUG(std::cerr << " Moved CPE to #" << ID << " CPI=" << CPI << "\t"
+ << *UserMI);
+
+
+ return true;
+}
+
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000000..97cca07d33
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,55 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/GlobalValue.h"
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+ bool isNonLazy, unsigned char PCAdj)
+ : MachineConstantPoolValue((const Type*)gv->getType()),
+ GV(gv), LabelId(id), isNonLazyPtr(isNonLazy), PCAdjust(PCAdj) {}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = (1 << Alignment)-1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].Offset & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (CPV->GV == GV && CPV->LabelId == LabelId &&
+ CPV->isNonLazyPtr == isNonLazyPtr)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddInteger(LabelId);
+ ID.AddInteger((unsigned)isNonLazyPtr);
+ ID.AddInteger(PCAdjust);
+}
+
+void ARMConstantPoolValue::print(std::ostream &O) const {
+ O << GV->getName();
+ if (isNonLazyPtr) O << "$non_lazy_ptr";
+ if (PCAdjust != 0) O << "-(LPIC" << LabelId << "+"
+ << (unsigned)PCAdjust << ")";
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000000..a9143d4ddc
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,50 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+
+namespace llvm {
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC relative displacement between the address of the load
+/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+ GlobalValue *GV; // GlobalValue being loaded.
+ unsigned LabelId; // Label id of the load.
+ bool isNonLazyPtr; // True if loading a Mac OS X non_lazy_ptr stub.
+ unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative.
+ // 8 for ARM, 4 for Thumb.
+
+public:
+ ARMConstantPoolValue(GlobalValue *gv, unsigned id, bool isNonLazy = false,
+ unsigned char PCAdj = 0);
+
+ GlobalValue *getGV() const { return GV; }
+ unsigned getLabelId() const { return LabelId; }
+ bool isNonLazyPointer() const { return isNonLazyPtr; }
+ unsigned char getPCAdjustment() const { return PCAdjust; }
+
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment);
+
+ virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+ virtual void print(std::ostream &O) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
index 5bd7f67667..c56640a354 100644
--- a/lib/Target/ARM/ARMFrameInfo.h
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -17,17 +17,15 @@
#include "ARM.h"
#include "llvm/Target/TargetFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "ARMSubtarget.h"
namespace llvm {
-class ARMFrameInfo: public TargetFrameInfo {
-
+class ARMFrameInfo : public TargetFrameInfo {
public:
- ARMFrameInfo()
- : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+ ARMFrameInfo(const ARMSubtarget &ST)
+ : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
}
-
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2a5f3e360c..f5f4599b5c 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -12,14 +12,14 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMISelLowering.h"
#include "ARMTargetMachine.h"
-#include "ARMCommon.h"
+#include "ARMAddressingModes.h"
#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
-#include "llvm/Constants.h"
#include "llvm/Intrinsics.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,1081 +28,545 @@
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include <vector>
+#include <iostream>
using namespace llvm;
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
namespace {
- class ARMTargetLowering : public TargetLowering {
- int VarArgsFrameIndex; // FrameIndex for start of varargs area.
- public:
- ARMTargetLowering(TargetMachine &TM);
- virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
- virtual const char *getTargetNodeName(unsigned Opcode) const;
- std::vector<unsigned>
- getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const;
- };
-
-}
-
-ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM) {
- addRegisterClass(MVT::i32, ARM::IntRegsRegisterClass);
- addRegisterClass(MVT::f32, ARM::FPRegsRegisterClass);
- addRegisterClass(MVT::f64, ARM::DFPRegsRegisterClass);
-
- setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-
- setOperationAction(ISD::RET, MVT::Other, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
-
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
-
- setOperationAction(ISD::SETCC, MVT::i32, Expand);
- setOperationAction(ISD::SETCC, MVT::f32, Expand);
- setOperationAction(ISD::SETCC, MVT::f64, Expand);
-
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
-
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f64, Custom);
-
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
-
- setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
- setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
-
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
- setStackPointerRegisterToSaveRestore(ARM::R13);
-
- setSchedulingPreference(SchedulingForRegPressure);
- computeRegisterProperties();
-}
-
-namespace llvm {
- namespace ARMISD {
- enum NodeType {
- // Start the numbering where the builting ops and target ops leave off.
- FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
- /// CALL - A direct function call.
- CALL,
-
- /// Return with a flag operand.
- RET_FLAG,
-
- CMP,
-
- SELECT,
-
- BR,
-
- FSITOS,
- FTOSIS,
-
- FSITOD,
- FTOSID,
-
- FUITOS,
- FTOUIS,
-
- FUITOD,
- FTOUID,
-
- FMRRD,
+class ARMDAGToDAGISel : public SelectionDAGISel {
+ ARMTargetLowering Lowering;
- FMDRR,
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
- FMSTAT
- };
+public:
+ ARMDAGToDAGISel(ARMTargetMachine &TM)
+ : SelectionDAGISel(Lowering), Lowering(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
-}
-/// DAGFPCCToARMCC - Convert a DAG fp condition code to an ARM CC
-// Unordered = !N & !Z & C & V = V
-// Ordered = N | Z | !C | !V = N | Z | !V
-static std::vector<unsigned> DAGFPCCToARMCC(ISD::CondCode CC) {
- switch (CC) {
- default:
- assert(0 && "Unknown fp condition code!");
-// SETOEQ = (N | Z | !V) & Z = Z = EQ
- case ISD::SETEQ:
- case ISD::SETOEQ: return make_vector<unsigned>(ARMCC::EQ, 0);
-// SETOGT = (N | Z | !V) & !N & !Z = !V &!N &!Z = (N = V) & !Z = GT
- case ISD::SETGT:
- case ISD::SETOGT: return make_vector<unsigned>(ARMCC::GT, 0);
-// SETOGE = (N | Z | !V) & !N = (Z | !V) & !N = !V & !N = GE
- case ISD::SETGE:
- case ISD::SETOGE: return make_vector<unsigned>(ARMCC::GE, 0);
-// SETOLT = (N | Z | !V) & N = N = MI
- case ISD::SETLT:
- case ISD::SETOLT: return make_vector<unsigned>(ARMCC::MI, 0);
-// SETOLE = (N | Z | !V) & (N | Z) = N | Z = !C | Z = LS
- case ISD::SETLE:
- case ISD::SETOLE: return make_vector<unsigned>(ARMCC::LS, 0);
-// SETONE = OGT | OLT
- case ISD::SETONE: return make_vector<unsigned>(ARMCC::GT, ARMCC::MI, 0);
-// SETO = N | Z | !V = Z | !V = !V = VC
- case ISD::SETO: return make_vector<unsigned>(ARMCC::VC, 0);
-// SETUO = V = VS
- case ISD::SETUO: return make_vector<unsigned>(ARMCC::VS, 0);
-// SETUEQ = V | Z (need two instructions) = EQ/VS
- case ISD::SETUEQ: return make_vector<unsigned>(ARMCC::EQ, ARMCC::VS, 0);
-// SETUGT = V | (!Z & !N) = !Z & !N = !Z & C = HI
- case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0);
-// SETUGE = V | !N = !N = PL
- case ISD::SETUGE: return make_vector<unsigned>(ARMCC::PL, 0);
-// SETULT = V | N = LT
- case ISD::SETULT: return make_vector<unsigned>(ARMCC::LT, 0);
-// SETULE = V | Z | N = LE
- case ISD::SETULE: return make_vector<unsigned>(ARMCC::LE, 0);
-// SETUNE = V | !Z = !Z = NE
- case ISD::SETNE:
- case ISD::SETUNE: return make_vector<unsigned>(ARMCC::NE, 0);
- }
-}
+ virtual const char *getPassName() const {
+ return "ARM Instruction Selection";
+ }
+
+ SDNode *Select(SDOperand Op);
+ virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
+ bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode3(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc);
+ bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
-/// DAGIntCCToARMCC - Convert a DAG integer condition code to an ARM CC
-static std::vector<unsigned> DAGIntCCToARMCC(ISD::CondCode CC) {
- switch (CC) {
- default:
- assert(0 && "Unknown integer condition code!");
- case ISD::SETEQ: return make_vector<unsigned>(ARMCC::EQ, 0);
- case ISD::SETNE: return make_vector<unsigned>(ARMCC::NE, 0);
- case ISD::SETLT: return make_vector<unsigned>(ARMCC::LT, 0);
- case ISD::SETLE: return make_vector<unsigned>(ARMCC::LE, 0);
- case ISD::SETGT: return make_vector<unsigned>(ARMCC::GT, 0);
- case ISD::SETGE: return make_vector<unsigned>(ARMCC::GE, 0);
- case ISD::SETULT: return make_vector<unsigned>(ARMCC::CC, 0);
- case ISD::SETULE: return make_vector<unsigned>(ARMCC::LS, 0);
- case ISD::SETUGT: return make_vector<unsigned>(ARMCC::HI, 0);
- case ISD::SETUGE: return make_vector<unsigned>(ARMCC::CS, 0);
- }
+ bool SelectAddrModePC(SDOperand Op, SDOperand N, SDOperand &Offset,
+ SDOperand &Label);
+
+ bool SelectThumbAddrModeRR(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+ bool SelectThumbAddrModeSP(SDOperand Op, SDOperand N, SDOperand &Base,
+ SDOperand &Offset);
+
+ bool SelectShifterOperandReg(SDOperand Op, SDOperand N, SDOperand &A,
+ SDOperand &B, SDOperand &C);
+
+ // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+};
}
-std::vector<unsigned> ARMTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
- MVT::ValueType VT) const {
- if (Constraint.size() == 1) {
- // FIXME: handling only r regs
- switch (Constraint[0]) {
- default: break; // Unknown constraint letter
-
- case 'r': // GENERAL_REGS
- case 'R': // LEGACY_REGS
- if (VT == MVT::i32)
- return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10, ARM::R11,
- ARM::R12, ARM::R13, ARM::R14, 0);
- break;
+void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
+ DEBUG(BB->dump());
- }
- }
+ DAG.setRoot(SelectRoot(DAG.getRoot()));
+ DAG.RemoveDeadNodes();
- return std::vector<unsigned>();
+ ScheduleAndEmitDAG(DAG);
}
-const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default: return 0;
- case ARMISD::CALL: return "ARMISD::CALL";
- case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
- case ARMISD::SELECT: return "ARMISD::SELECT";
- case ARMISD::CMP: return "ARMISD::CMP";
- case ARMISD::BR: return "ARMISD::BR";
- case ARMISD::FSITOS: return "ARMISD::FSITOS";
- case ARMISD::FTOSIS: return "ARMISD::FTOSIS";
- case ARMISD::FSITOD: return "ARMISD::FSITOD";
- case ARMISD::FTOSID: return "ARMISD::FTOSID";
- case ARMISD::FUITOS: return "ARMISD::FUITOS";
- case ARMISD::FTOUIS: return "ARMISD::FTOUIS";
- case ARMISD::FUITOD: return "ARMISD::FUITOD";
- case ARMISD::FTOUID: return "ARMISD::FTOUID";
- case ARMISD::FMRRD: return "ARMISD::FMRRD";
- case ARMISD::FMDRR: return "ARMISD::FMDRR";
- case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset,
+ SDOperand &Opc) {
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
+ }
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
}
-}
-
-class ArgumentLayout {
- std::vector<bool> is_reg;
- std::vector<unsigned> pos;
- std::vector<MVT::ValueType> types;
-public:
- ArgumentLayout(const std::vector<MVT::ValueType> &Types) {
- types = Types;
-
- unsigned RegNum = 0;
- unsigned StackOffset = 0;
- for(std::vector<MVT::ValueType>::const_iterator I = Types.begin();
- I != Types.end();
- ++I) {
- MVT::ValueType VT = *I;
- assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
- unsigned size = MVT::getSizeInBits(VT)/32;
-
- RegNum = ((RegNum + size - 1) / size) * size;
- if (RegNum < 4) {
- pos.push_back(RegNum);
- is_reg.push_back(true);
- RegNum += size;
- } else {
- unsigned bytes = size * 32/8;
- StackOffset = ((StackOffset + bytes - 1) / bytes) * bytes;
- pos.push_back(StackOffset);
- is_reg.push_back(false);
- StackOffset += bytes;
+
+ // Match simple R +/- imm12 operands.
+ if (N.getOpcode() == ISD::ADD)
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits.
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ } else if (RHSC < 0 && RHSC > -0x1000) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::sub, -RHSC,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
}
}
- }
- unsigned getRegisterNum(unsigned argNum) {
- assert(isRegister(argNum));
- return pos[argNum];
- }
- unsigned getOffset(unsigned argNum) {
- assert(isOffset(argNum));
- return pos[argNum];
- }
- unsigned isRegister(unsigned argNum) {
- assert(argNum < is_reg.size());
- return is_reg[argNum];
- }
- unsigned isOffset(unsigned argNum) {
- return !isRegister(argNum);
- }
- MVT::ValueType getType(unsigned argNum) {
- assert(argNum < types.size());
- return types[argNum];
- }
- unsigned getStackSize(void) {
- int last = is_reg.size() - 1;
- if (last < 0)
- return 0;
- if (isRegister(last))
- return 0;
- return getOffset(last) + MVT::getSizeInBits(getType(last))/8;
- }
- int lastRegArg(void) {
- int size = is_reg.size();
- int last = 0;
- while(last < size && isRegister(last))
- last++;
- last--;
- return last;
- }
- int lastRegNum(void) {
- int l = lastRegArg();
- if (l < 0)
- return -1;
- unsigned r = getRegisterNum(l);
- MVT::ValueType t = getType(l);
- assert(t == MVT::i32 || t == MVT::f32 || t == MVT::f64);
- if (t == MVT::f64)
- return r + 1;
- return r;
- }
-};
-
-// This transforms a ISD::CALL node into a
-// callseq_star <- ARMISD:CALL <- callseq_end
-// chain
-static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG) {
- SDOperand Chain = Op.getOperand(0);
- unsigned CallConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- assert((CallConv == CallingConv::C ||
- CallConv == CallingConv::Fast)
- && "unknown calling convention");
- SDOperand Callee = Op.getOperand(4);
- unsigned NumOps = (Op.getNumOperands() - 5) / 2;
- SDOperand StackPtr = DAG.getRegister(ARM::R13, MVT::i32);
- static const unsigned regs[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3
- };
-
- std::vector<MVT::ValueType> Types;
- for (unsigned i = 0; i < NumOps; ++i) {
- MVT::ValueType VT = Op.getOperand(5+2*i).getValueType();
- Types.push_back(VT);
- }
- ArgumentLayout Layout(Types);
-
- unsigned NumBytes = Layout.getStackSize();
-
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getConstant(NumBytes, MVT::i32));
-
- //Build a sequence of stores
- std::vector<SDOperand> MemOpChains;
- for (unsigned i = Layout.lastRegArg() + 1; i < NumOps; ++i) {
- SDOperand Arg = Op.getOperand(5+2*i);
- unsigned ArgOffset = Layout.getOffset(i);
- SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
- PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
- }
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- // Likewise ExternalSymbol -> TargetExternalSymbol.
- assert(Callee.getValueType() == MVT::i32);
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
- else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
-
- // If this is a direct call, pass the chain and the callee.
- assert (Callee.Val);
- std::vector<SDOperand> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDOperand InFlag;
- for (int i = 0, e = Layout.lastRegArg(); i <= e; ++i) {
- SDOperand Arg = Op.getOperand(5+2*i);
- unsigned RegNum = Layout.getRegisterNum(i);
- unsigned Reg1 = regs[RegNum];
- MVT::ValueType VT = Layout.getType(i);
- assert(VT == Arg.getValueType());
- assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-
- // Add argument register to the end of the list so that it is known live
- // into the call.
- Ops.push_back(DAG.getRegister(Reg1, MVT::i32));
- if (VT == MVT::f64) {
- unsigned Reg2 = regs[RegNum + 1];
- SDOperand SDReg1 = DAG.getRegister(Reg1, MVT::i32);
- SDOperand SDReg2 = DAG.getRegister(Reg2, MVT::i32);
-
- Ops.push_back(DAG.getRegister(Reg2, MVT::i32));
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand Ops[] = {Chain, SDReg1, SDReg2, Arg, InFlag};
- Chain = DAG.getNode(ARMISD::FMRRD, VTs, Ops, InFlag.Val ? 5 : 4);
+
+ // Otherwise this is R +/- [possibly shifted] R
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(1).getOperand(0);
} else {
- if (VT == MVT::f32)
- Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg);
- Chain = DAG.getCopyToReg(Chain, Reg1, Arg, InFlag);
+ ShOpcVal = ARM_AM::no_shift;
}
- InFlag = Chain.getValue(1);
}
-
- std::vector<MVT::ValueType> NodeTys;
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
-
- unsigned CallOpc = ARMISD::CALL;
- if (InFlag.Val)
- Ops.push_back(InFlag);
- Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- std::vector<SDOperand> ResultVals;
- NodeTys.clear();
-
- // If the call has results, copy the values out of the ret val registers.
- MVT::ValueType VT = Op.Val->getValueType(0);
- if (VT != MVT::Other) {
- assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
-
- SDOperand Value1 = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag);
- Chain = Value1.getValue(1);
- InFlag = Value1.getValue(2);
- NodeTys.push_back(VT);
- if (VT == MVT::i32) {
- ResultVals.push_back(Value1);
- if (Op.Val->getValueType(1) == MVT::i32) {
- SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag);
- Chain = Value2.getValue(1);
- ResultVals.push_back(Value2);
- NodeTys.push_back(VT);
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
}
}
- if (VT == MVT::f32) {
- SDOperand Value = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Value1);
- ResultVals.push_back(Value);
- }
- if (VT == MVT::f64) {
- SDOperand Value2 = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, InFlag);
- Chain = Value2.getValue(1);
- SDOperand Value = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2);
- ResultVals.push_back(Value);
- }
}
-
- Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
- DAG.getConstant(NumBytes, MVT::i32));
- NodeTys.push_back(MVT::Other);
-
- if (ResultVals.empty())
- return Chain;
-
- ResultVals.push_back(Chain);
- SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0],
- ResultVals.size());
- return Res.getValue(Op.ResNo);
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
}
-static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
- SDOperand Copy;
- SDOperand Chain = Op.getOperand(0);
- SDOperand R0 = DAG.getRegister(ARM::R0, MVT::i32);
- SDOperand R1 = DAG.getRegister(ARM::R1, MVT::i32);
-
- switch(Op.getNumOperands()) {
- default:
- assert(0 && "Do not know how to return this many arguments!");
- abort();
- case 1: {
- SDOperand LR = DAG.getRegister(ARM::R14, MVT::i32);
- return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain);
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getValue();
+ if (Val >= 0 && Val < 0x1000) { // 12 bits.
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+ ARM_AM::no_shift),
+ MVT::i32);
+ return true;
+ }
}
- case 3: {
- SDOperand Val = Op.getOperand(1);
- assert(Val.getValueType() == MVT::i32 ||
- Val.getValueType() == MVT::f32 ||
- Val.getValueType() == MVT::f64);
- if (Val.getValueType() == MVT::f64) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand Ops[] = {Chain, R0, R1, Val};
- Copy = DAG.getNode(ARMISD::FMRRD, VTs, Ops, 4);
+ Offset = N;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+ unsigned ShAmt = 0;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShAmt = Sh->getValue();
+ Offset = N.getOperand(0);
} else {
- if (Val.getValueType() == MVT::f32)
- Val = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Val);
- Copy = DAG.getCopyToReg(Chain, R0, Val, SDOperand());
- }
-
- if (DAG.getMachineFunction().liveout_empty()) {
- DAG.getMachineFunction().addLiveOut(ARM::R0);
- if (Val.getValueType() == MVT::f64)
- DAG.getMachineFunction().addLiveOut(ARM::R1);
- }
- break;
- }
- case 5:
- Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand());
- Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1));
- // If we haven't noted the R0+R1 are live out, do so now.
- if (DAG.getMachineFunction().liveout_empty()) {
- DAG.getMachineFunction().addLiveOut(ARM::R0);
- DAG.getMachineFunction().addLiveOut(ARM::R1);
+ ShOpcVal = ARM_AM::no_shift;
}
- break;
}
- //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag
- return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
}
-static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType PtrVT = Op.getValueType();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- Constant *C = CP->getConstVal();
- SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
- return CPI;
-}
-
-SDOperand LegalizeImmediate(uint32_t immediate, SelectionDAG &DAG,
- bool canReturnConstant){
- SDOperand Shift = DAG.getTargetConstant(0, MVT::i32);
- SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32);
- std::vector<unsigned>immediatePieces = splitImmediate(immediate);
- if (immediatePieces.size()>1){
- unsigned movInst = ARM::MOV;
- unsigned orInst = ARM::ORR;
- SDNode *node;
- //try mvn
- std::vector<unsigned>immediateNegPieces = splitImmediate(~immediate);
- if (immediatePieces.size() > immediateNegPieces.size()) {
- //use mvn/eor
- movInst = ARM::MVN;
- orInst = ARM::EOR;
- immediatePieces = immediateNegPieces;
- }
- SDOperand n = DAG.getTargetConstant(immediatePieces[0], MVT::i32);
- node = DAG.getTargetNode(movInst, MVT::i32, n, Shift, ShiftType);
- std::vector<unsigned>::iterator it;
- for (it=immediatePieces.begin()+1; it != immediatePieces.end(); ++it){
- n = DAG.getTargetConstant(*it, MVT::i32);
- SDOperand ops[] = {SDOperand(node, 0), n, Shift, ShiftType};
- node = DAG.getTargetNode(orInst, MVT::i32, ops, 4);
+bool ARMDAGToDAGISel::SelectAddrMode3(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset,
+ SDOperand &Opc) {
+ if (N.getOpcode() == ISD::SUB) {
+ // X - C is canonicalize to X + -C, no need to handle it here.
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
}
- return SDOperand(node, 0);
- } else {
- if (canReturnConstant)
- return DAG.getTargetConstant(immediate, MVT::i32);
- else {
- SDOperand n = DAG.getTargetConstant(immediate, MVT::i32);
- SDNode *node = DAG.getTargetNode(ARM::MOV, MVT::i32, n, Shift,
- ShiftType);
- return SDOperand(node, 0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+ return true;
+ }
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC >= 0 && RHSC < 256) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, RHSC),
+ MVT::i32);
+ return true;
+ } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, -RHSC),
+ MVT::i32);
+ return true;
}
}
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+ return true;
}
-static SDOperand LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
- MVT::ValueType VT = Op.getValueType();
- SDOperand Shift = DAG.getTargetConstant(0, MVT::i32);
- SDOperand ShiftType = DAG.getTargetConstant(ARMShift::LSL, MVT::i32);
- SDNode *node;
- switch (VT) {
- default: assert(0 && "VT!=f32 && VT!=f64");
- case MVT::f32: {
- float val = cast<ConstantFPSDNode>(Op)->getValue();
- uint32_t i32_val = FloatToBits(val);
- SDOperand c = LegalizeImmediate(i32_val, DAG, false);
- node = DAG.getTargetNode(ARM::FMSR, MVT::f32, c);
- break;
- }
- case MVT::f64: {
- double val = cast<ConstantFPSDNode>(Op)->getValue();
- uint64_t i64_val = DoubleToBits(val);
- SDOperand hi = LegalizeImmediate(Hi_32(i64_val), DAG, false);
- SDOperand lo = LegalizeImmediate(Lo_32(i64_val), DAG, false);
- node = DAG.getTargetNode(ARM::FMDRR, MVT::f64, lo, hi);
- break;
- }
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Opc) {
+ unsigned Opcode = Op.getOpcode();
+ ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+ ? cast<LoadSDNode>(Op)->getAddressingMode()
+ : cast<StoreSDNode>(Op)->getAddressingMode();
+ ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+ ? ARM_AM::add : ARM_AM::sub;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ int Val = (int)C->getValue();
+ if (Val >= 0 && Val < 256) {
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+ return true;
+ }
}
- return SDOperand(node, 0);
-}
-
-static SDOperand LowerGlobalAddress(SDOperand Op,
- SelectionDAG &DAG) {
- GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- int alignment = 2;
- SDOperand CPAddr = DAG.getConstantPool(GV, MVT::i32, alignment);
- return DAG.getLoad(MVT::i32, DAG.getEntryNode(), CPAddr, NULL, 0);
-}
-static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
- unsigned VarArgsFrameIndex) {
- // vastart just stores the address of the VarArgsFrameIndex slot into the
- // memory location argument.
- MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
- return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
- SV->getOffset());
+ Offset = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+ return true;
}
-static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG,
- int &VarArgsFrameIndex) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SSARegMap *RegMap = MF.getSSARegMap();
- unsigned NumArgs = Op.Val->getNumValues()-1;
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- static const unsigned REGS[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3
- };
-
- std::vector<MVT::ValueType> Types(Op.Val->value_begin(), Op.Val->value_end() - 1);
- ArgumentLayout Layout(Types);
-
- std::vector<SDOperand> ArgValues;
- for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) {
- MVT::ValueType VT = Types[ArgNo];
- SDOperand Value;
- if (Layout.isRegister(ArgNo)) {
- assert(VT == MVT::i32 || VT == MVT::f32 || VT == MVT::f64);
- unsigned RegNum = Layout.getRegisterNum(ArgNo);
- unsigned Reg1 = REGS[RegNum];
- unsigned VReg1 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
- SDOperand Value1 = DAG.getCopyFromReg(Root, VReg1, MVT::i32);
- MF.addLiveIn(Reg1, VReg1);
- if (VT == MVT::f64) {
- unsigned Reg2 = REGS[RegNum + 1];
- unsigned VReg2 = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
- SDOperand Value2 = DAG.getCopyFromReg(Root, VReg2, MVT::i32);
- MF.addLiveIn(Reg2, VReg2);
- Value = DAG.getNode(ARMISD::FMDRR, MVT::f64, Value1, Value2);
- } else {
- Value = Value1;
- if (VT == MVT::f32)
- Value = DAG.getNode(ISD::BIT_CONVERT, VT, Value);
- }
- } else {
- // If the argument is actually used, emit a load from the right stack
- // slot.
- if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
- unsigned Offset = Layout.getOffset(ArgNo);
- unsigned Size = MVT::getSizeInBits(VT)/8;
- int FI = MFI->CreateFixedObject(Size, Offset);
- SDOperand FIN = DAG.getFrameIndex(FI, VT);
- Value = DAG.getLoad(VT, Root, FIN, NULL, 0);
- } else {
- Value = DAG.getNode(ISD::UNDEF, VT);
- }
+bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset) {
+ if (N.getOpcode() != ISD::ADD) {
+ Base = N;
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ } else if (N.getOpcode() == ARMISD::Wrapper) {
+ Base = N.getOperand(0);
}
- ArgValues.push_back(Value);
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
}
-
- unsigned NextRegNum = Layout.lastRegNum() + 1;
-
- if (isVarArg) {
- //If this function is vararg we must store the remaing
- //registers so that they can be acessed with va_start
- VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8,
- -16 + NextRegNum * 4);
-
- SmallVector<SDOperand, 4> MemOps;
- for (unsigned RegNo = NextRegNum; RegNo < 4; ++RegNo) {
- int RegOffset = - (4 - RegNo) * 4;
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(MVT::i32)/8,
- RegOffset);
- SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
-
- unsigned VReg = RegMap->createVirtualRegister(&ARM::IntRegsRegClass);
- MF.addLiveIn(REGS[RegNo], VReg);
-
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
+
+ // If the RHS is +/- imm8, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4.
+ RHSC >>= 2;
+ if (RHSC >= 0 && RHSC < 256) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, RHSC),
+ MVT::i32);
+ return true;
+ } else if (RHSC < 0 && RHSC > -256) { // note -256 itself isn't allowed.
+ Base = N.getOperand(0);
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::sub,-RHSC),
+ MVT::i32);
+ return true;
+ }
}
- Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
}
-
- ArgValues.push_back(Root);
-
- // Return the new list of results.
- std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
- Op.Val->value_end());
- return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
-}
-
-static SDOperand GetCMP(ISD::CondCode CC, SDOperand LHS, SDOperand RHS,
- SelectionDAG &DAG) {
- MVT::ValueType vt = LHS.getValueType();
- assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64);
-
- SDOperand Cmp = DAG.getNode(ARMISD::CMP, MVT::Flag, LHS, RHS);
-
- if (vt != MVT::i32)
- Cmp = DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp);
- return Cmp;
-}
-
-static std::vector<SDOperand> GetARMCC(ISD::CondCode CC, MVT::ValueType vt,
- SelectionDAG &DAG) {
- assert(vt == MVT::i32 || vt == MVT::f32 || vt == MVT::f64);
- std::vector<unsigned> vcc;
- if (vt == MVT::i32)
- vcc = DAGIntCCToARMCC(CC);
- else
- vcc = DAGFPCCToARMCC(CC);
-
- std::vector<unsigned>::iterator it;
- std::vector<SDOperand> result;
- for( it = vcc.begin(); it != vcc.end(); it++ )
- result.push_back(DAG.getConstant(*it,MVT::i32));
- return result;
-}
-
-static bool isUInt8Immediate(uint32_t x) {
- return x < (1 << 8);
-}
-
-static uint32_t rotateL(uint32_t x) {
- uint32_t bit31 = (x & (1 << 31)) >> 31;
- uint32_t t = x << 1;
- return t | bit31;
+
+ Base = N;
+ Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+ MVT::i32);
+ return true;
}
-static bool isRotInt8Immediate(uint32_t x) {
- int r;
- for (r = 0; r < 16; r++) {
- if (isUInt8Immediate(x))
- return true;
- x = rotateL(rotateL(x));
+bool ARMDAGToDAGISel::SelectAddrModePC(SDOperand Op, SDOperand N,
+ SDOperand &Offset, SDOperand &Label) {
+ if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+ Offset = N.getOperand(0);
+ SDOperand N1 = N.getOperand(1);
+ Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getValue(),
+ MVT::i32);
+ return true;
}
return false;
}
-static void LowerCMP(SDOperand &Cmp, std::vector<SDOperand> &ARMCC,
- SDOperand LHS, SDOperand RHS, ISD::CondCode CC,
- SelectionDAG &DAG) {
- MVT::ValueType vt = LHS.getValueType();
- if (vt == MVT::i32) {
- assert(!isa<ConstantSDNode>(LHS));
- if (ConstantSDNode *SD_C = dyn_cast<ConstantSDNode>(RHS.Val)) {
- uint32_t C = SD_C->getValue();
-
- uint32_t NC;
- switch(CC) {
- default:
- NC = C; break;
- case ISD::SETLT:
- case ISD::SETULT:
- case ISD::SETGE:
- case ISD::SETUGE:
- NC = C - 1; break;
- case ISD::SETLE:
- case ISD::SETULE:
- case ISD::SETGT:
- case ISD::SETUGT:
- NC = C + 1; break;
- }
-
- ISD::CondCode NCC;
- switch(CC) {
- default:
- NCC = CC; break;
- case ISD::SETLT:
- NCC = ISD::SETLE; break;
- case ISD::SETULT:
- NCC = ISD::SETULE; break;
- case ISD::SETGE:
- NCC = ISD::SETGT; break;
- case ISD::SETUGE:
- NCC = ISD::SETUGT; break;
- case ISD::SETLE:
- NCC = ISD::SETLT; break;
- case ISD::SETULE:
- NCC = ISD::SETULT; break;
- case ISD::SETGT:
- NCC = ISD::SETGE; break;
- case ISD::SETUGT:
- NCC = ISD::SETUGE; break;
- }
-
- if (!isRotInt8Immediate(C) && isRotInt8Immediate(NC)) {
- RHS = DAG.getConstant(NC, MVT::i32);
- CC = NCC;
- }
- }
- }
- Cmp = GetCMP(CC, LHS, RHS, DAG);
- ARMCC = GetARMCC(CC, vt, DAG);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset){
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+ return true;
}
-static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) {
- SDOperand LHS = Op.getOperand(0);
- SDOperand RHS = Op.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDOperand TrueVal = Op.getOperand(2);
- SDOperand FalseVal = Op.getOperand(3);
- SDOperand Cmp;
- std::vector<SDOperand> ARMCC;
- LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG);
-
- SDOperand Aux = FalseVal;
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
- std::vector<SDOperand>::iterator it;
- for (it = ARMCC.begin(); it != ARMCC.end(); ++it){
- SDOperand Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1);
- SDOperand Ops[] = {TrueVal, Aux, *it, Flag};
- Aux = DAG.getNode(ARMISD::SELECT, VTs, Ops, 4);
+static bool SelectThumbAddrModeRI5(SDOperand N, unsigned Scale,
+ TargetLowering &TLI, SelectionDAG *CurDAG,
+ SDOperand &Base, SDOperand &Offset) {
+ if (N.getOpcode() == ISD::FrameIndex)
+ return false;
+
+ if (N.getOpcode() != ISD::ADD) {
+ Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
}
- return Aux;
-}
-static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG) {
- SDOperand Chain = Op.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDOperand LHS = Op.getOperand(2);
- SDOperand RHS = Op.getOperand(3);
- SDOperand Dest = Op.getOperand(4);
- SDOperand Cmp;
- std::vector<SDOperand> ARMCC;
- LowerCMP(Cmp, ARMCC, LHS, RHS, CC, DAG);
-
- SDOperand Aux = Chain;
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag);
- std::vector<SDOperand>::iterator it;
- for (it = ARMCC.begin(); it != ARMCC.end(); it++){
- SDOperand Flag = it == ARMCC.begin() ? Cmp : Aux.getValue(1);
- SDOperand Ops[] = {Aux, Dest, *it, Flag};
- Aux = DAG.getNode(ARMISD::BR, VTs, Ops, 4);
+ // If the RHS is + imm5 * scale, fold into addr mode.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied.
+ RHSC /= Scale;
+ if (RHSC >= 0 && RHSC < 32) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
}
- return Aux;
-}
-static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
- SDOperand IntVal = Op.getOperand(0);
- assert(IntVal.getValueType() == MVT::i32);
- MVT::ValueType vt = Op.getValueType();
- assert(vt == MVT::f32 ||
- vt == MVT::f64);
-
- SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal);
- ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FSITOS : ARMISD::FSITOD;
- return DAG.getNode(op, vt, Tmp);
-}
-
-static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getValueType() == MVT::i32);
- SDOperand FloatVal = Op.getOperand(0);
- MVT::ValueType vt = FloatVal.getValueType();
- assert(vt == MVT::f32 || vt == MVT::f64);
-
- ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOSIS : ARMISD::FTOSID;
- SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal);
- return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp);
-}
-
-static SDOperand LowerUINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
- SDOperand IntVal = Op.getOperand(0);
- assert(IntVal.getValueType() == MVT::i32);
- MVT::ValueType vt = Op.getValueType();
- assert(vt == MVT::f32 ||
- vt == MVT::f64);
-
- SDOperand Tmp = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, IntVal);
- ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FUITOS : ARMISD::FUITOD;
- return DAG.getNode(op, vt, Tmp);
-}
-
-static SDOperand LowerFP_TO_UINT(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getValueType() == MVT::i32);
- SDOperand FloatVal = Op.getOperand(0);
- MVT::ValueType vt = FloatVal.getValueType();
- assert(vt == MVT::f32 || vt == MVT::f64);
-
- ARMISD::NodeType op = vt == MVT::f32 ? ARMISD::FTOUIS : ARMISD::FTOUID;
- SDOperand Tmp = DAG.getNode(op, MVT::f32, FloatVal);
- return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Tmp);
-}
-
-SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
- switch (Op.getOpcode()) {
- default:
- assert(0 && "Should not custom lower this!");
- abort();
- case ISD::ConstantPool:
- return LowerConstantPool(Op, DAG);
- case ISD::ConstantFP:
- return LowerConstantFP(Op, DAG);
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
- case ISD::FP_TO_SINT:
- return LowerFP_TO_SINT(Op, DAG);
- case ISD::SINT_TO_FP:
- return LowerSINT_TO_FP(Op, DAG);
- case ISD::FP_TO_UINT:
- return LowerFP_TO_UINT(Op, DAG);
- case ISD::UINT_TO_FP:
- return LowerUINT_TO_FP(Op, DAG);
- case ISD::FORMAL_ARGUMENTS:
- return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
- case ISD::CALL:
- return LowerCALL(Op, DAG);
- case ISD::RET:
- return LowerRET(Op, DAG);
- case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG);
- case ISD::BR_CC:
- return LowerBR_CC(Op, DAG);
- case ISD::VASTART:
- return LowerVASTART(Op, DAG, VarArgsFrameIndex);
- }
+ return false;
}
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===--------------------------------------------------------------------===//
-/// ARMDAGToDAGISel - ARM specific code to select ARM machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
-class ARMDAGToDAGISel : public SelectionDAGISel {
- ARMTargetLowering Lowering;
-
-public:
- ARMDAGToDAGISel(TargetMachine &TM)
- : SelectionDAGISel(Lowering), Lowering(TM) {
- }
-
- SDNode *Select(SDOperand Op);
- virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
- bool SelectAddrMode1(SDOperand Op, SDOperand N, SDOperand &Arg,
- SDOperand &Shift, SDOperand &ShiftType);
- bool SelectAddrMode1a(SDOperand Op, SDOperand N, SDOperand &Arg,
- SDOperand &Shift, SDOperand &ShiftType);
- bool SelectAddrMode2(SDOperand Op, SDOperand N, SDOperand &Arg,
- SDOperand &Offset);
- bool SelectAddrMode5(SDOperand Op, SDOperand N, SDOperand &Arg,
- SDOperand &Offset);
-
- // Include the pieces autogenerated from the target description.
-#include "ARMGenDAGISel.inc"
-};
-
-void ARMDAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
- DEBUG(BB->dump());
-
- DAG.setRoot(SelectRoot(DAG.getRoot()));
- DAG.RemoveDeadNodes();
-
- ScheduleAndEmitDAG(DAG);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_1(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset){
+ return SelectThumbAddrModeRI5(N, 1, TLI, CurDAG, Base, Offset);
}
-static bool isInt12Immediate(SDNode *N, short &Imm) {
- if (N->getOpcode() != ISD::Constant)
- return false;
-
- int32_t t = cast<ConstantSDNode>(N)->getValue();
- int max = 1<<12;
- int min = -max;
- if (t > min && t < max) {
- Imm = t;
- return true;
- }
- else
- return false;
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_2(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset){
+ return SelectThumbAddrModeRI5(N, 2, TLI, CurDAG, Base, Offset);
}
-static bool isInt12Immediate(SDOperand Op, short &Imm) {
- return isInt12Immediate(Op.Val, Imm);
+bool ARMDAGToDAGISel::SelectThumbAddrModeRI5_4(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset){
+ return SelectThumbAddrModeRI5(N, 4, TLI, CurDAG, Base, Offset);
}
-bool ARMDAGToDAGISel::SelectAddrMode1(SDOperand Op,
- SDOperand N,
- SDOperand &Arg,
- SDOperand &Shift,
- SDOperand &ShiftType) {
- switch(N.getOpcode()) {
- case ISD::Constant: {
- uint32_t val = cast<ConstantSDNode>(N)->getValue();
- Shift = CurDAG->getTargetConstant(0, MVT::i32);
- ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
- Arg = LegalizeImmediate(val, *CurDAG, true);
- return true;
- }
-
- case ISD::SRA:
- Arg = N.getOperand(0);
- Shift = N.getOperand(1);
- ShiftType = CurDAG->getTargetConstant(ARMShift::ASR, MVT::i32);
- return true;
- case ISD::SRL:
- Arg = N.getOperand(0);
- Shift = N.getOperand(1);
- ShiftType = CurDAG->getTargetConstant(ARMShift::LSR, MVT::i32);
- return true;
- case ISD::SHL:
- Arg = N.getOperand(0);
- Shift = N.getOperand(1);
- ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDOperand Op, SDOperand N,
+ SDOperand &Base, SDOperand &Offset) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-
- Arg = N;
- Shift = CurDAG->getTargetConstant(0, MVT::i32);
- ShiftType = CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32);
- return true;
+
+ return false;
}
-bool ARMDAGToDAGISel::SelectAddrMode2(SDOperand Op, SDOperand N,
- SDOperand &Arg, SDOperand &Offset) {
- //TODO: complete and cleanup!
- SDOperand Zero = CurDAG->getTargetConstant(0, MVT::i32);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- Arg = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = Zero;
- return true;
- }
- if (N.getOpcode() == ISD::ADD) {
- short imm = 0;
- if (isInt12Immediate(N.getOperand(1), imm)) {
- Offset = CurDAG->getTargetConstant(imm, MVT::i32);
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
- Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
- } else {
- Arg = N.getOperand(0);
- }
- return true; // [r+i]
- }
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDOperand Op,
+ SDOperand N,
+ SDOperand &BaseReg,
+ SDOperand &ShReg,
+ SDOperand &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ ShReg = CurDAG->getRegister(0, MVT::i32);
+ ShImmVal = RHS->getValue() & 31;
+ } else {
+ ShReg = N.getOperand(1);
}
- Offset = Zero;
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
- Arg = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
- Arg = N;
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+ MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode5(SDOperand Op,
- SDOperand N, SDOperand &Arg,
- SDOperand &Offset) {
- //TODO: detect offset
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- Arg = N;
- return true;
-}
SDNode *ARMDAGToDAGISel::Select(SDOperand Op) {
SDNode *N = Op.Val;
+ unsigned Opcode = N->getOpcode();
+
+ if (Opcode >= ISD::BUILTIN_OP_END && Opcode < ARMISD::FIRST_NUMBER)
+ return NULL; // Already selected.
switch (N->getOpcode()) {
- default:
- return SelectCode(Op);
+ default: break;
+ case ISD::Constant: {
+ unsigned Val = cast<ConstantSDNode>(N)->getValue();
+ bool UseCP = true;
+ if (Subtarget->isThumb())
+ UseCP = (Val > 255 && // MOV
+ ~Val > 255 && // MOV + MVN
+ !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL
+ else
+ UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
+ ARM_AM::getSOImmVal(~Val) == -1 && // MVN
+ !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs.
+ if (UseCP) {
+ SDOperand CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+ TLI.getPointerTy());
+ SDOperand Ops[] = {
+ CPIdx,
+ CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getEntryNode()
+ };
+ SDNode *ResNode =
+ CurDAG->getTargetNode(ARM::LDR, MVT::i32, MVT::Other, Ops, 4);
+ ReplaceUses(Op, SDOperand(ResNode, 0));
+ return NULL;
+ }
+
+ // Other cases are autogenerated.
break;
+ }
case ISD::FrameIndex: {
+ // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
- SDOperand Ops[] = {CurDAG->getTargetFrameIndex(FI, MVT::i32),
- CurDAG->getTargetConstant(0, MVT::i32),
- CurDAG->getTargetConstant(0, MVT::i32),
- CurDAG->getTargetConstant(ARMShift::LSL, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::tADDrSPi : ARM::ADDri;
+ SDOperand TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, TFI,
+ CurDAG->getTargetConstant(0, MVT::i32));
+ }
+ case ISD::MUL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RHSV = C->getValue();
+ if (!RHSV) break;
+ if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
+ SDOperand V = Op.getOperand(0);
+ AddToISelQueue(V);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+ SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32)
+ };
+ return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 4);
+ }
+ if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
+ SDOperand V = Op.getOperand(0);
+ AddToISelQueue(V);
+ unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+ SDOperand Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getTargetConstant(ShImm, MVT::i32)
+ };
+ return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 4);
+ }
+ }
+ break;
+ case ARMISD::FMRRD:
+ AddToISelQueue(Op.getOperand(0));
+ return CurDAG->getTargetNode(ARM::FMRRD, MVT::i32, MVT::i32,
+ Op.getOperand(0));
+ case ARMISD::MULHILOU:
+ AddToISelQueue(Op.getOperand(0));
+ AddToISelQueue(Op.getOperand(1));
+ return CurDAG->getTargetNode(ARM::UMULL, MVT::i32, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ case ARMISD::MULHILOS:
+ AddToISelQueue(Op.getOperand(0));
+ AddToISelQueue(Op.getOperand(1));
+ return CurDAG->getTargetNode(ARM::SMULL, MVT::i32, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ MVT::ValueType LoadedVT = LD->getLoadedVT();
+ if (AM != ISD::UNINDEXED) {
+ SDOperand Offset, AMOpc;
+ bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+ unsigned Opcode = 0;
+ bool Match = false;
+ if (LoadedVT == MVT::i32 &&
+ SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+ Match = true;
+ } else if (LoadedVT == MVT::i16 &&
+ SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+ ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+ : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+ } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+ if (LD->getExtensionType() == ISD::SEXTLOAD) {
+ if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+ }
+ } else {
+ if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+ Match = true;
+ Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+ }
+ }
+ }
- return CurDAG->SelectNodeTo(N, ARM::ADD, MVT::i32, Ops,
- sizeof(Ops)/sizeof(SDOperand));
+ if (Match) {
+ SDOperand Chain = LD->getChain();
+ SDOperand Base = LD->getBasePtr();
+ AddToISelQueue(Chain);
+ AddToISelQueue(Base);
+ AddToISelQueue(Offset);
+ SDOperand Ops[] = { Base, Offset, AMOpc, Chain };
+ return CurDAG->getTargetNode(Opcode, MVT::i32, MVT::i32,
+ MVT::Other, Ops, 4);
+ }
+ }
+ // Other cases are autogenerated.
break;
}
}
-}
-} // end anonymous namespace
+ return SelectCode(Op);
+}
/// createARMISelDag - This pass converts a legalized DAG into a
/// ARM-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createARMISelDag(TargetMachine &TM) {
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
return new ARMDAGToDAGISel(TM);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000000..f4cba4b336
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,1414 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM), ARMPCLabelIndex(0) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+ // Uses VFP for Thumb libfuncs if available.
+ if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ // Single-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+ setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+ setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+ setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+ // Double-precision floating-point arithmetic.
+ setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+ setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+ setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+ setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+ // Single-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+ setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+ setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+ setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+ setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+ setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+ setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
+
+ // Double-precision comparisons.
+ setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+ setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+ setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+ setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+ setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+ setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+ setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+ // Conversions between floating types.
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc: e.g.
+ // __floatunsidf vs. __floatunssidfvfp.
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ }
+
+ addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+ addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ }
+
+ // ARM does not have f32 extending load.
+ setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // ARM supports all 4 flavors of integer indexed load / store.
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i1, Legal);
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i1, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ }
+
+ // i64 operation support.
+ if (Subtarget->isThumb()) {
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ } else {
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Custom);
+ if (!Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Custom);
+ }
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL, MVT::i64, Custom);
+ setOperationAction(ISD::SRA, MVT::i64, Custom);
+
+ // ARM does not have ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ if (!Subtarget->hasV5TOps())
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+ // These are expanded into libcalls.
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ // Support label based line numbers.
+ setOperationAction(ISD::LOCATION, MVT::Other, Expand);
+ setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+ // FIXME - use subtarget debug flags
+ if (Subtarget->isDarwin())
+ setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
+
+ setOperationAction(ISD::RET, MVT::Other, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Expand mem operations genericly.
+ setOperationAction(ISD::MEMSET , MVT::Other, Expand);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
+ setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VASTART , MVT::Other, Expand);
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+
+ if (!Subtarget->hasV6Ops()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ }
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb())
+ // Turn f64->i64 into FMRRD iff target supports vfp2.
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+
+ setOperationAction(ISD::SETCC , MVT::i32, Expand);
+ setOperationAction(ISD::SETCC , MVT::f32, Expand);
+ setOperationAction(ISD::SETCC , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT , MVT::i32, Expand);
+ setOperationAction(ISD::SELECT , MVT::f32, Expand);
+ setOperationAction(ISD::SELECT , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ setOperationAction(ISD::BRCOND , MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC , MVT::i32, Custom);
+ setOperationAction(ISD::BR_CC , MVT::f32, Custom);
+ setOperationAction(ISD::BR_CC , MVT::f64, Custom);
+ setOperationAction(ISD::BR_JT , MVT::Other, Custom);
+
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+
+ // FP Constants can't be immediates.
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+
+ // We don't support sin/cos/fmod/copysign
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ setStackPointerRegisterToSaveRestore(ARM::SP);
+
+ setSchedulingPreference(SchedulingForRegPressure);
+ computeRegisterProperties();
+}
+
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case ARMISD::Wrapper: return "ARMISD::Wrapper";
+ case ARMISD::WrapperCall: return "ARMISD::WrapperCall";
+ case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
+ case ARMISD::CALL: return "ARMISD::CALL";
+ case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tCALL: return "ARMISD::tCALL";
+ case ARMISD::BRCOND: return "ARMISD::BRCOND";
+ case ARMISD::BR_JT: return "ARMISD::BR_JT";
+ case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::CMP: return "ARMISD::CMP";
+ case ARMISD::CMPFP: return "ARMISD::CMPFP";
+ case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
+ case ARMISD::CMOV: return "ARMISD::CMOV";
+ case ARMISD::CNEG: return "ARMISD::CNEG";
+
+ case ARMISD::FTOSI: return "ARMISD::FTOSI";
+ case ARMISD::FTOUI: return "ARMISD::FTOUI";
+ case ARMISD::SITOF: return "ARMISD::SITOF";
+ case ARMISD::UITOF: return "ARMISD::UITOF";
+ case ARMISD::MULHILOU: return "ARMISD::MULHILOU";
+ case ARMISD::MULHILOS: return "ARMISD::MULHILOS";
+
+ case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
+ case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
+ case ARMISD::RRX: return "ARMISD::RRX";
+
+ case ARMISD::FMRRD: return "ARMISD::FMRRD";
+ case ARMISD::FMDRR: return "ARMISD::FMDRR";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: assert(0 && "Unknown condition code!");
+ case ISD::SETNE: return ARMCC::NE;
+ case ISD::SETEQ: return ARMCC::EQ;
+ case ISD::SETGT: return ARMCC::GT;
+ case ISD::SETGE: return ARMCC::GE;
+ case ISD::SETLT: return ARMCC::LT;
+ case ISD::SETLE: return ARMCC::LE;
+ case ISD::SETUGT: return ARMCC::HI;
+ case ISD::SETUGE: return ARMCC::HS;
+ case ISD::SETULT: return ARMCC::LO;
+ case ISD::SETULE: return ARMCC::LS;
+ }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+/// returns true if the operands should be inverted to form the proper
+/// comparison.
+static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ ARMCC::CondCodes &CondCode2) {
+ bool Invert = false;
+ CondCode2 = ARMCC::AL;
+ switch (CC) {
+ default: assert(0 && "Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETGT:
+ case ISD::SETOGT: CondCode = ARMCC::GT; break;
+ case ISD::SETGE:
+ case ISD::SETOGE: CondCode = ARMCC::GE; break;
+ case ISD::SETOLT: CondCode = ARMCC::MI; break;
+ case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+ case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETO: CondCode = ARMCC::VC; break;
+ case ISD::SETUO: CondCode = ARMCC::VS; break;
+ case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUGT: CondCode = ARMCC::HI; break;
+ case ISD::SETUGE: CondCode = ARMCC::PL; break;
+ case ISD::SETLT:
+ case ISD::SETULT: CondCode = ARMCC::LT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: CondCode = ARMCC::LE; break;
+ case ISD::SETNE:
+ case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ }
+ return Invert;
+}
+
+static void
+HowToPassArgument(MVT::ValueType ObjectVT,
+ unsigned NumGPRs, unsigned &ObjSize, unsigned &ObjGPRs) {
+ ObjSize = 0;
+ ObjGPRs = 0;
+
+ switch (ObjectVT) {
+ default: assert(0 && "Unhandled argument type!");
+ case MVT::i32:
+ case MVT::f32:
+ if (NumGPRs < 4)
+ ObjGPRs = 1;
+ else
+ ObjSize = 4;
+ break;
+ case MVT::i64:
+ case MVT::f64:
+ if (NumGPRs < 3)
+ ObjGPRs = 2;
+ else if (NumGPRs == 3) {
+ ObjGPRs = 1;
+ ObjSize = 4;
+ } else
+ ObjSize = 8;
+ }
+}
+
+// This transforms a ISD::CALL node into a
+// callseq_star <- ARMISD:CALL <- callseq_end
+// chain
+SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType RetVT= Op.Val->getValueType(0);
+ SDOperand Chain = Op.getOperand(0);
+ unsigned CallConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::CSRet ||
+ CallConv == CallingConv::Fast) && "unknown calling convention");
+ SDOperand Callee = Op.getOperand(4);
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+ unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
+ unsigned NumGPRs = 0; // GPRs used for parameter passing.
+
+ // Count how many bytes are to be pushed on the stack.
+ unsigned NumBytes = 0;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i < NumOps; ++i) {
+ unsigned ObjSize = 0;
+ unsigned ObjGPRs = 0;
+ MVT::ValueType ObjectVT = Op.getOperand(5+2*i).getValueType();
+ HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs);
+ NumBytes += ObjSize;
+ NumGPRs += ObjGPRs;
+ }
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getConstant(NumBytes, MVT::i32));
+
+ SDOperand StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ NumGPRs = 0;
+ std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
+ std::vector<SDOperand> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDOperand Arg = Op.getOperand(5+2*i);
+ MVT::ValueType ArgVT = Arg.getValueType();
+
+ unsigned ObjSize = 0;
+ unsigned ObjGPRs = 0;
+ HowToPassArgument(ArgVT, NumGPRs, ObjSize, ObjGPRs);
+ if (ObjGPRs > 0) {
+ switch (ArgVT) {
+ default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i32:
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg));
+ break;
+ case MVT::f32:
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs],
+ DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg)));
+ break;
+ case MVT::i64: {
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+ DAG.getConstant(0, getPointerTy()));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg,
+ DAG.getConstant(1, getPointerTy()));
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo));
+ if (ObjGPRs == 2)
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi));
+ else {
+ SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Hi, PtrOff, NULL, 0));
+ }
+ break;
+ }
+ case MVT::f64: {
+ SDOperand Cvt = DAG.getNode(ARMISD::FMRRD,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ &Arg, 1);
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt));
+ if (ObjGPRs == 2)
+ RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1],
+ Cvt.getValue(1)));
+ else {
+ SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Cvt.getValue(1), PtrOff,
+ NULL, 0));
+ }
+ break;
+ }
+ }
+ } else {
+ assert(ObjSize != 0);
+ SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+ }
+
+ NumGPRs += ObjGPRs;
+ ArgOffset += ObjSize;
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDOperand InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ bool isDirect = false;
+ bool isARMFunc = false;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ isDirect = true;
+ bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage());
+ bool isStub = (isExt && Subtarget->isDarwin()) &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // Wrap it since tBX takes a register source operand.
+ if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps())
+ Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee);
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ isDirect = true;
+ bool isStub = Subtarget->isDarwin() &&
+ getTargetMachine().getRelocationModel() != Reloc::Static;
+ isARMFunc = !Subtarget->isThumb() || isStub;
+ // Wrap it since tBX takes a register source operand.
+ if (!Subtarget->hasV5TOps() && Subtarget->isThumb())
+ Callee = DAG.getNode(ARMISD::WrapperCall, MVT::i32, Callee);
+ }
+
+ std::vector<MVT::ValueType> NodeTys;
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
+
+ std::vector<SDOperand> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // FIXME: handle tail calls differently.
+ unsigned CallOpc;
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+ CallOpc = ARMISD::CALL_NOLINK;
+ else
+ CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+ } else {
+ CallOpc = (isDirect || Subtarget->hasV5TOps())
+ ? ARMISD::CALL : ARMISD::CALL_NOLINK;
+ }
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ SDOperand CSOps[] = { Chain, DAG.getConstant(NumBytes, MVT::i32), InFlag };
+ Chain = DAG.getNode(ISD::CALLSEQ_END,
+ DAG.getNodeValueTypes(MVT::Other, MVT::Flag),
+ ((RetVT != MVT::Other) ? 2 : 1), CSOps, 3);
+ if (RetVT != MVT::Other)
+ InFlag = Chain.getValue(1);
+
+ std::vector<SDOperand> ResultVals;
+ NodeTys.clear();
+
+ // If the call has results, copy the values out of the ret val registers.
+ switch (RetVT) {
+ default: assert(0 && "Unexpected ret value!");
+ case MVT::Other:
+ break;
+ case MVT::i32:
+ Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ if (Op.Val->getValueType(1) == MVT::i32) {
+ // Returns a i64 value.
+ Chain = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32,
+ Chain.getValue(2)).getValue(1);
+ ResultVals.push_back(Chain.getValue(0));
+ NodeTys.push_back(MVT::i32);
+ }
+ NodeTys.push_back(MVT::i32);
+ break;
+ case MVT::f32:
+ Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1);
+ ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, MVT::f32,
+ Chain.getValue(0)));
+ NodeTys.push_back(MVT::f32);
+ break;
+ case MVT::f64: {
+ SDOperand Lo = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag);
+ SDOperand Hi = DAG.getCopyFromReg(Lo, ARM::R1, MVT::i32, Lo.getValue(2));
+ ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, MVT::f64, Lo, Hi));
+ NodeTys.push_back(MVT::f64);
+ break;
+ }
+ }
+
+ NodeTys.push_back(MVT::Other);
+
+ if (ResultVals.empty())
+ return Chain;
+
+ ResultVals.push_back(Chain);
+ SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0],
+ ResultVals.size());
+ return Res.getValue(Op.ResNo);
+}
+
+static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Copy;
+ SDOperand Chain = Op.getOperand(0);
+ switch(Op.getNumOperands()) {
+ default:
+ assert(0 && "Do not know how to return this many arguments!");
+ abort();
+ case 1: {
+ SDOperand LR = DAG.getRegister(ARM::LR, MVT::i32);
+ return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain);
+ }
+ case 3:
+ Op = Op.getOperand(1);
+ if (Op.getValueType() == MVT::f32) {
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+ } else if (Op.getValueType() == MVT::f64) {
+ // Recursively legalize f64 -> i64.
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Op);
+ return DAG.getNode(ISD::RET, MVT::Other, Chain, Op,
+ DAG.getConstant(0, MVT::i32));
+ }
+ Copy = DAG.getCopyToReg(Chain, ARM::R0, Op, SDOperand());
+ if (DAG.getMachineFunction().liveout_empty())
+ DAG.getMachineFunction().addLiveOut(ARM::R0);
+ break;
+ case 5:
+ Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand());
+ Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1));
+ // If we haven't noted the R0+R1 are live out, do so now.
+ if (DAG.getMachineFunction().liveout_empty()) {
+ DAG.getMachineFunction().addLiveOut(ARM::R0);
+ DAG.getMachineFunction().addLiveOut(ARM::R1);
+ }
+ break;
+ }
+
+ //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag
+ return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVri.
+static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ SDOperand Res;
+ if (CP->isMachineConstantPoolEntry())
+ Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment());
+ return DAG.getNode(ARMISD::Wrapper, MVT::i32, Res);
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+/// even in dynamic-no-pic mode.
+static bool GVIsIndirectSymbol(GlobalValue *GV) {
+ return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
+}
+
+SDOperand ARMTargetLowering::LowerGlobalAddress(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT::ValueType PtrVT = getPointerTy();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+ bool IsIndirect = Subtarget->isDarwin() && GVIsIndirectSymbol(GV);
+ SDOperand CPAddr;
+ if (RelocM == Reloc::Static)
+ CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+ else {
+ unsigned PCAdj = (RelocM != Reloc::PIC_)
+ ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+ IsIndirect, PCAdj);
+ CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+ }
+ CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr);
+
+ SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0);
+ SDOperand Chain = Result.getValue(1);
+
+ if (RelocM == Reloc::PIC_) {
+ SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel);
+ }
+ if (IsIndirect)
+ Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0);
+
+ return Result;
+}
+
+static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG,
+ unsigned VarArgsFrameIndex) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+ SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(),
+ SV->getOffset());
+}
+
+static SDOperand LowerFORMAL_ARGUMENT(SDOperand Op, SelectionDAG &DAG,
+ unsigned *vRegs, unsigned ArgNo,
+ unsigned &NumGPRs, unsigned &ArgOffset) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
+ SDOperand Root = Op.getOperand(0);
+ std::vector<SDOperand> ArgValues;
+ SSARegMap *RegMap = MF.getSSARegMap();
+
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ unsigned ObjSize = 0;
+ unsigned ObjGPRs = 0;
+ HowToPassArgument(ObjectVT, NumGPRs, ObjSize, ObjGPRs);
+
+ SDOperand ArgValue;
+ if (ObjGPRs == 1) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ vRegs[NumGPRs] = VReg;
+ ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ if (ObjectVT == MVT::f32)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, ArgValue);
+ } else if (ObjGPRs == 2) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ vRegs[NumGPRs] = VReg;
+ ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+
+ VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs+1], VReg);
+ vRegs[NumGPRs+1] = VReg;
+ SDOperand ArgValue2 = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i64)
+ ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+ else
+ ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+ }
+ NumGPRs += ObjGPRs;
+
+ if (ObjSize) {
+ // If the argument is actually used, emit a load from the right stack
+ // slot.
+ if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
+ if (ObjGPRs == 0)
+ ArgValue = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
+ else {
+ SDOperand ArgValue2 =
+ DAG.getLoad(MVT::i32, Root, FIN, NULL, 0);
+ if (ObjectVT == MVT::i64)
+ ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
+ else
+ ArgValue= DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2);
+ }
+ } else {
+ // Don't emit a dead load.
+ ArgValue = DAG.getNode(ISD::UNDEF, ObjectVT);
+ }
+
+ ArgOffset += ObjSize; // Move on to the next argument.
+ }
+
+ return ArgValue;
+}
+
+SDOperand
+ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
+ std::vector<SDOperand> ArgValues;
+ SDOperand Root = Op.getOperand(0);
+ unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
+ unsigned NumGPRs = 0; // GPRs used for parameter passing.
+ unsigned VRegs[4];
+
+ unsigned NumArgs = Op.Val->getNumValues()-1;
+ for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
+ ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, VRegs, ArgNo,
+ NumGPRs, ArgOffset));
+
+ bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (isVarArg) {
+ static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+ };
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SSARegMap *RegMap = MF.getSSARegMap();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned VARegSaveSize = (4 - NumGPRs) * 4;
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset);
+ SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+ SmallVector<SDOperand, 4> MemOps;
+ for (; NumGPRs < 4; ++NumGPRs) {
+ unsigned VReg = RegMap->createVirtualRegister(&ARM::GPRRegClass);
+ MF.addLiveIn(GPRArgRegs[NumGPRs], VReg);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ }
+
+ ArgValues.push_back(Root);
+
+ // Return the new list of results.
+ std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
+ Op.Val->value_end());
+ return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDOperand Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->isExactlyValue(0.0);
+ else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) {
+ // Maybe this has already been legalized into the constant pool?
+ if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+ SDOperand WrapperOp = Op.getOperand(1).getOperand(0);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->isExactlyValue(0.0);
+ }
+ }
+ return false;
+}
+
+static bool isLegalCmpImmediate(int C, bool isThumb) {
+ return ( isThumb && (C & ~255U) == 0) ||
+ (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC,
+ SDOperand &ARMCC, SelectionDAG &DAG, bool isThumb) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.Val)) {
+ int C = (int)RHSC->getValue();
+ if (!isLegalCmpImmediate(C, isThumb)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default: break;
+ case ISD::SETLT:
+ case ISD::SETULT:
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ if (isLegalCmpImmediate(C-1, isThumb)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETLT: CC = ISD::SETLE; break;
+ case ISD::SETULT: CC = ISD::SETULE; break;
+ case ISD::SETGE: CC = ISD::SETGT; break;
+ case ISD::SETUGE: CC = ISD::SETUGT; break;
+ }
+ RHS = DAG.getConstant(C-1, MVT::i32);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETULE:
+ case ISD::SETGT:
+ case ISD::SETUGT:
+ if (isLegalCmpImmediate(C+1, isThumb)) {
+ switch (CC) {
+ default: break;
+ case ISD::SETLE: CC = ISD::SETLT; break;
+ case ISD::SETULE: CC = ISD::SETULT; break;
+ case ISD::SETGT: CC = ISD::SETGE; break;
+ case ISD::SETUGT: CC = ISD::SETUGE; break;
+ }
+ RHS = DAG.getConstant(C+1, MVT::i32);
+ }
+ break;
+ }
+ }
+ }
+
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ return DAG.getNode(ARMISD::CMP, MVT::Flag, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+static SDOperand getVFPCmp(SDOperand LHS, SDOperand RHS, SelectionDAG &DAG) {
+ SDOperand Cmp;
+ if (!isFloatingPointZero(RHS))
+ Cmp = DAG.getNode(ARMISD::CMPFP, MVT::Flag, LHS, RHS);
+ else
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, MVT::Flag, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp);
+}
+
+static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand LHS = Op.getOperand(0);
+ SDOperand RHS = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDOperand TrueVal = Op.getOperand(2);
+ SDOperand FalseVal = Op.getOperand(3);
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDOperand ARMCC;
+ SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+ return DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, ARMCC, Cmp);
+ }
+
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ std::swap(TrueVal, FalseVal);
+
+ SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+ SDOperand Result = DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal,
+ ARMCC, Cmp);
+ if (CondCode2 != ARMCC::AL) {
+ SDOperand ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ // FIXME: Needs another CMP because flag can have but one use.
+ SDOperand Cmp2 = getVFPCmp(LHS, RHS, DAG);
+ Result = DAG.getNode(ARMISD::CMOV, VT, Result, TrueVal, ARMCC2, Cmp2);
+ }
+ return Result;
+}
+
+static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ SDOperand Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDOperand LHS = Op.getOperand(2);
+ SDOperand RHS = Op.getOperand(3);
+ SDOperand Dest = Op.getOperand(4);
+
+ if (LHS.getValueType() == MVT::i32) {
+ SDOperand ARMCC;
+ SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb());
+ return DAG.getNode(ARMISD::BRCOND, MVT::Other, Chain, Dest, ARMCC, Cmp);
+ }
+
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ ARMCC::CondCodes CondCode, CondCode2;
+ if (FPCCToARMCC(CC, CondCode, CondCode2))
+ // Swap the LHS/RHS of the comparison if needed.
+ std::swap(LHS, RHS);
+
+ SDOperand Cmp = getVFPCmp(LHS, RHS, DAG);
+ SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Ops[] = { Chain, Dest, ARMCC, Cmp };
+ SDOperand Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4);
+ if (CondCode2 != ARMCC::AL) {
+ ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+ SDOperand Ops[] = { Res, Dest, ARMCC, Res.getValue(1) };
+ Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 4);
+ }
+ return Res;
+}
+
+SDOperand ARMTargetLowering::LowerBR_JT(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Chain = Op.getOperand(0);
+ SDOperand Table = Op.getOperand(1);
+ SDOperand Index = Op.getOperand(2);
+
+ MVT::ValueType PTy = getPointerTy();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ SDOperand UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+ SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+ Table = DAG.getNode(ARMISD::WrapperJT, MVT::i32, JTI, UId);
+ Index = DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(4, PTy));
+ SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table);
+ bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ Addr = DAG.getLoad(isPIC ? MVT::i32 : PTy, Chain, Addr, NULL, 0);
+ Chain = Addr.getValue(1);
+ if (isPIC)
+ Addr = DAG.getNode(ISD::ADD, PTy, Addr, Table);
+ return DAG.getNode(ARMISD::BR_JT, MVT::Other, Chain, Addr, JTI, UId);
+}
+
+static SDOperand LowerFP_TO_INT(SDOperand Op, SelectionDAG &DAG) {
+ unsigned Opc =
+ Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+ Op = DAG.getNode(Opc, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op);
+}
+
+static SDOperand LowerINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ unsigned Opc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+
+ Op = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0));
+ return DAG.getNode(Opc, VT, Op);
+}
+
+static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
+ // Implement fcopysign with a fabs and a conditional fneg.
+ SDOperand Tmp0 = Op.getOperand(0);
+ SDOperand Tmp1 = Op.getOperand(1);
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType SrcVT = Tmp1.getValueType();
+ SDOperand AbsVal = DAG.getNode(ISD::FABS, VT, Tmp0);
+ SDOperand Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG);
+ SDOperand ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, Cmp);
+}
+
+static SDOperand LowerBIT_CONVERT(SDOperand Op, SelectionDAG &DAG) {
+ // Turn f64->i64 into FMRRD.
+ assert(Op.getValueType() == MVT::i64 &&
+ Op.getOperand(0).getValueType() == MVT::f64);
+
+ Op = Op.getOperand(0);
+ SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32),
+ &Op, 1);
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Cvt, Cvt.getValue(1));
+}
+
+static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+ // FIXME: All this code is target-independent. Create a new target-indep
+ // MULHILO node and move this code to the legalizer.
+ //
+ assert(Op.getValueType() == MVT::i64 && "Only handles i64 expand right now!");
+
+ SDOperand LL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDOperand RL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, MVT::i32));
+
+ const TargetLowering &TL = DAG.getTargetLoweringInfo();
+ unsigned LHSSB = TL.ComputeNumSignBits(Op.getOperand(0));
+ unsigned RHSSB = TL.ComputeNumSignBits(Op.getOperand(1));
+
+ SDOperand Lo, Hi;
+ // Figure out how to lower this multiply.
+ if (LHSSB >= 33 && RHSSB >= 33) {
+ // If the input values are both sign extended, we can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, MVT::i32, LL, RL);
+ } else if (LHSSB == 32 && RHSSB == 32 &&
+ TL.MaskedValueIsZero(Op.getOperand(0), 0xFFFFFFFF00000000ULL) &&
+ TL.MaskedValueIsZero(Op.getOperand(1), 0xFFFFFFFF00000000ULL)) {
+ // If the inputs are zero extended, use mulhu.
+ Lo = DAG.getNode(ISD::MUL, MVT::i32, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, MVT::i32, LL, RL);
+ } else {
+ SDOperand LH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+ SDOperand RH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, MVT::i32));
+
+ // Lo,Hi = umul LHS, RHS.
+ SDOperand Ops[] = { LL, RL };
+ SDOperand UMul64 = DAG.getNode(ARMISD::MULHILOU,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2);
+ Lo = UMul64;
+ Hi = UMul64.getValue(1);
+ RH = DAG.getNode(ISD::MUL, MVT::i32, LL, RH);
+ LH = DAG.getNode(ISD::MUL, MVT::i32, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, MVT::i32, Hi, LH);
+ }
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+}
+
+static SDOperand LowerMULHU(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ return DAG.getNode(ARMISD::MULHILOU,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+}
+
+static SDOperand LowerMULHS(SDOperand Op, SelectionDAG &DAG) {
+ SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ return DAG.getNode(ARMISD::MULHILOS,
+ DAG.getVTList(MVT::i32, MVT::i32), Ops, 2).getValue(1);
+}
+
+static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ assert(Op.getValueType() == MVT::i64 &&
+ (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+ "Unknown shift to lower!");
+
+ // We only lower SRA, SRL of 1 here, all others use generic lowering.
+ if (!isa<ConstantSDNode>(Op.getOperand(1)) ||
+ cast<ConstantSDNode>(Op.getOperand(1))->getValue() != 1)
+ return SDOperand();
+
+ // If we are in thumb mode, we don't have RRX.
+ if (ST->isThumb()) return SDOperand();
+
+ // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
+ SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
+ DAG.getConstant(1, MVT::i32));
+
+ // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // captures the result into a carry flag.
+ unsigned Opc = Op.getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ Hi = DAG.getNode(Opc, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+
+ // The low part is an ARMISD::RRX operand, which shifts the carry in.
+ Lo = DAG.getNode(ARMISD::RRX, MVT::i32, Lo, Hi.getValue(1));
+
+ // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
+}
+
+SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Don't know how to custom lower this!"); abort();
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::CALL: return LowerCALL(Op, DAG);
+ case ISD::RET: return LowerRET(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
+ case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+ case ISD::MULHU: return LowerMULHU(Op, DAG);
+ case ISD::MULHS: return LowerMULHS(Op, DAG);
+ case ISD::SRL:
+ case ISD::SRA: return LowerSRx(Op, DAG, Subtarget);
+ case ISD::FORMAL_ARGUMENTS:
+ return LowerFORMAL_ARGUMENTS(Op, DAG);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *BB) {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case ARM::tMOVCCr: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ ilist<MachineBasicBlock>::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
+ BuildMI(BB, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm());
+ MachineFunction *F = BB->getParent();
+ F->getBasicBlockList().insert(It, copy0MBB);
+ F->getBasicBlockList().insert(It, sinkMBB);
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i)
+ sinkMBB->addSuccessor(*i);
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(BB, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ delete MI; // The pseudo instruction is gone now.
+ return BB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressImmediate - Return true if the integer value or
+/// GlobalValue can be used as the offset of the target addressing mode.
+bool ARMTargetLowering::isLegalAddressImmediate(int64_t V) const {
+ // ARM allows a 12-bit immediate field.
+ return V == V & ((1LL << 12) - 1);
+}
+
+bool ARMTargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
+ return false;
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT,
+ bool isSEXTLoad, SDOperand &Base,
+ SDOperand &Offset, bool &isInc,
+ SelectionDAG &DAG) {
+ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+ return false;
+
+ if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+ // AddressingMode 3
+ Base = Ptr->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC < 0 && RHSC > -256) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ return true;
+ }
+ }
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Offset = Ptr->getOperand(1);
+ return true;
+ } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+ // AddressingMode 2
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+ int RHSC = (int)RHS->getValue();
+ if (RHSC < 0 && RHSC > -0x1000) {
+ isInc = false;
+ Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+ Base = Ptr->getOperand(0);
+ return true;
+ }
+ }
+
+ if (Ptr->getOpcode() == ISD::ADD) {
+ isInc = true;
+ ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ Base = Ptr->getOperand(1);
+ Offset = Ptr->getOperand(0);
+ } else {
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ }
+ return true;
+ }
+
+ isInc = (Ptr->getOpcode() == ISD::ADD);
+ Base = Ptr->getOperand(0);
+ Offset = Ptr->getOperand(1);
+ return true;
+ }
+
+ // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT::ValueType VT;
+ SDOperand Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getLoadedVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getStoredVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Ptr.Val, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+ }
+ return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) {
+ if (Subtarget->isThumb())
+ return false;
+
+ MVT::ValueType VT;
+ SDOperand Ptr;
+ bool isSEXTLoad = false;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getLoadedVT();
+ isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getStoredVT();
+ } else
+ return false;
+
+ bool isInc;
+ bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+ isInc, DAG);
+ if (isLegal) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
+ return false;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ unsigned Depth) const {
+ KnownZero = 0;
+ KnownOne = 0;
+ switch (Op.getOpcode()) {
+ default: break;
+ case ARMISD::CMOV: {
+ // Bits are known zero/one if known on the LHS and RHS.
+ ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+ if (KnownZero == 0 && KnownOne == 0) return;
+
+ uint64_t KnownZeroRHS, KnownOneRHS;
+ ComputeMaskedBits(Op.getOperand(1), Mask,
+ KnownZeroRHS, KnownOneRHS, Depth+1);
+ KnownZero &= KnownZeroRHS;
+ KnownOne &= KnownOneRHS;
+ return;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(char ConstraintLetter) const {
+ switch (ConstraintLetter) {
+ case 'l':
+ return C_RegisterClass;
+ default: return TargetLowering::getConstraintType(ConstraintLetter);
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'l':
+ // FIXME: in thumb mode, 'l' is only low-regs.
+ // FALL THROUGH.
+ case 'r':
+ return std::make_pair(0U, ARM::GPRRegisterClass);
+ break;
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) { // GCC ARM Constraint Letters
+ default: break;
+ case 'l':
+ case 'r':
+ return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+ ARM::R12, ARM::LR, 0);
+ }
+
+ return std::vector<unsigned>();
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000000..5f7ed65a3e
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,134 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include <vector>
+
+namespace llvm {
+ class ARMConstantPoolValue;
+ class ARMSubtarget;
+
+ namespace ARMISD {
+ // ARM Specific DAG Nodes
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END+ARM::INSTRUCTION_LIST_END,
+
+ Wrapper, // Wrapper - A wrapper node for TargetConstantPool,
+ // TargetExternalSymbol, and TargetGlobalAddress.
+ WrapperCall, // WrapperCall - Same as wrapper, but mark the wrapped
+ // node as call operand.
+ WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable
+
+ CALL, // Function call.
+ CALL_NOLINK, // Function call with branch not branch-and-link.
+ tCALL, // Thumb function call.
+ BRCOND, // Conditional branch.
+ BR_JT, // Jumptable branch.
+ RET_FLAG, // Return with a flag operand.
+
+ PIC_ADD, // Add with a PC operand and a PIC label.
+
+ CMP, // ARM compare instructions.
+ CMPFP, // ARM VFP compare instruction, sets FPSCR.
+ CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
+ FMSTAT, // ARM fmstat instruction.
+ CMOV, // ARM conditional move instructions.
+ CNEG, // ARM conditional negate instructions.
+
+ FTOSI, // FP to sint within a FP register.
+ FTOUI, // FP to uint within a FP register.
+ SITOF, // sint to FP within a FP register.
+ UITOF, // uint to FP within a FP register.
+
+ MULHILOU, // Lo,Hi = umul LHS, RHS.
+ MULHILOS, // Lo,Hi = smul LHS, RHS.
+
+ SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
+
+ FMRRD, // double to two gprs.
+ FMDRR // Two gprs to double.
+ };
+ }
+
+ //===----------------------------------------------------------------------===//
+ // ARMTargetLowering - X86 Implementation of the TargetLowering interface
+
+ class ARMTargetLowering : public TargetLowering {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ public:
+ ARMTargetLowering(TargetMachine &TM);
+
+ virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG);
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual MachineBasicBlock *InsertAtEndOfBasicBlock(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+
+ /// isLegalAddressImmediate - Return true if the integer value or
+ /// GlobalValue can be used as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ virtual bool getPreIndexedAddressParts(SDNode *N, SDOperand &Base,
+ SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG);
+
+ /// getPostIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if this node can be
+ /// combined with a load / store to form a post-indexed load / store.
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDOperand &Base, SDOperand &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG);
+
+ virtual void computeMaskedBitsForTargetNode(const SDOperand Op,
+ uint64_t Mask,
+ uint64_t &KnownZero,
+ uint64_t &KnownOne,
+ unsigned Depth) const;
+ ConstraintType getConstraintType(char ConstraintLetter) const;
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ MVT::ValueType VT) const;
+ private:
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
+ /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+ ///
+ unsigned ARMPCLabelIndex;
+
+ SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
+ SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
+ };
+}
+
+#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index f99615b572..b5425fec8a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -14,46 +14,409 @@
#include "ARMInstrInfo.h"
#include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
-ARMInstrInfo::ARMInstrInfo()
+static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+ cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: TargetInstrInfo(ARMInsts, sizeof(ARMInsts)/sizeof(ARMInsts[0])),
- RI(*this) {
+ RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getDWARF_LABELOpcode() const {
+ return ARM::DWARF_LABEL;
}
const TargetRegisterClass *ARMInstrInfo::getPointerRegClass() const {
- return &ARM::IntRegsRegClass;
+ return &ARM::GPRRegClass;
}
/// Return true if the instruction is a register to register move and
/// leave the source and dest operands in the passed parameters.
///
bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg) const {
+ unsigned &SrcReg, unsigned &DstReg) const {
MachineOpCode oc = MI.getOpcode();
switch (oc) {
- case ARM::MOV: {
- assert(MI.getNumOperands() == 4 &&
- MI.getOperand(0).isRegister() &&
+ default:
+ return false;
+ case ARM::FCPYS:
+ case ARM::FCPYD:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ case ARM::MOVrr:
+ case ARM::tMOVrr:
+ assert(MI.getNumOperands() == 2 && MI.getOperand(0).isRegister() &&
+ MI.getOperand(1).isRegister() &&
"Invalid ARM MOV instruction");
- const MachineOperand &Arg = MI.getOperand(1);
- const MachineOperand &Shift = MI.getOperand(2);
- if (Arg.isRegister() && Shift.isImmediate() && Shift.getImmedValue() == 0) {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ return true;
+ }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const{
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::LDR:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImmediate() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FLDD:
+ case ARM::FLDS:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::tLDRspi:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::STR:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isReg() &&
+ MI->getOperand(3).isImmediate() &&
+ MI->getOperand(2).getReg() == 0 &&
+ MI->getOperand(3).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ case ARM::FSTD:
+ case ARM::FSTS:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
}
+ break;
+ case ARM::tSTRspi:
+ if (MI->getOperand(1).isFrameIndex() &&
+ MI->getOperand(2).isImmediate() &&
+ MI->getOperand(2).getImmedValue() == 0) {
+ FrameIndex = MI->getOperand(1).getFrameIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
}
+ return 0;
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case ARM::LDR_PRE:
+ case ARM::LDR_POST:
+ return ARM::LDR;
+ case ARM::LDRH_PRE:
+ case ARM::LDRH_POST:
+ return ARM::LDRH;
+ case ARM::LDRB_PRE:
+ case ARM::LDRB_POST:
+ return ARM::LDRB;
+ case ARM::LDRSH_PRE:
+ case ARM::LDRSH_POST:
+ return ARM::LDRSH;
+ case ARM::LDRSB_PRE:
+ case ARM::LDRSB_POST:
+ return ARM::LDRSB;
+ case ARM::STR_PRE:
+ case ARM::STR_POST:
+ return ARM::STR;
+ case ARM::STRH_PRE:
+ case ARM::STRH_POST:
+ return ARM::STRH;
+ case ARM::STRB_PRE:
+ case ARM::STRB_POST:
+ return ARM::STRB;
}
- return false;
+ return 0;
}
-void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const std::vector<MachineOperand> &Cond)const{
- // Can only insert uncond branches so far.
- assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, get(ARM::b)).addMBB(TBB);
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const {
+ if (!EnableARM3Addr)
+ return NULL;
+
+ MachineInstr *MI = MBBI;
+ unsigned TSFlags = MI->getInstrDescriptor()->TSFlags;
+ bool isPre = false;
+ switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+ default: return NULL;
+ case ARMII::IndexModePre:
+ isPre = true;
+ break;
+ case ARMII::IndexModePost:
+ break;
+ }
+
+ // Try spliting an indexed load / store to a un-indexed one plus an add/sub
+ // operation.
+ unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+ if (MemOpc == 0)
+ return NULL;
+
+ MachineInstr *UpdateMI = NULL;
+ MachineInstr *MemMI = NULL;
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ unsigned NumOps = MI->getNumOperands();
+ bool isLoad = (MI->getInstrDescriptor()->Flags & M_LOAD_FLAG) != 0;
+ const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+ const MachineOperand &Base = MI->getOperand(2);
+ const MachineOperand &Offset = MI->getOperand(NumOps-2);
+ unsigned WBReg = WB.getReg();
+ unsigned BaseReg = Base.getReg();
+ unsigned OffReg = Offset.getReg();
+ unsigned OffImm = MI->getOperand(NumOps-1).getImm();
+ switch (AddrMode) {
+ default:
+ assert(false && "Unknown indexed op!");
+ return NULL;
+ case ARMII::AddrMode2: {
+ bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ if (OffReg == 0) {
+ int SOImmVal = ARM_AM::getSOImmVal(Amt);
+ if (SOImmVal == -1)
+ // Can't encode it in a so_imm operand. This transformation will
+ // add more than 1 instruction. Abandon!
+ return NULL;
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(SOImmVal);
+ } else if (Amt != 0) {
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+ .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc);
+ } else
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg);
+ break;
+ }
+ case ARMII::AddrMode3 : {
+ bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+ unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+ if (OffReg == 0)
+ // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+ .addReg(BaseReg).addImm(Amt);
+ else
+ UpdateMI = BuildMI(get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+ .addReg(BaseReg).addReg(OffReg);
+ break;
+ }
+ }
+
+ std::vector<MachineInstr*> NewMIs;
+ if (isPre) {
+ if (isLoad)
+ MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(WBReg).addReg(0).addImm(0);
+ else
+ MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(WBReg).addReg(0).addImm(0);
+ NewMIs.push_back(MemMI);
+ NewMIs.push_back(UpdateMI);
+ } else {
+ if (isLoad)
+ MemMI = BuildMI(get(MemOpc), MI->getOperand(0).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0);
+ else
+ MemMI = BuildMI(get(MemOpc)).addReg(MI->getOperand(1).getReg())
+ .addReg(BaseReg).addReg(0).addImm(0);
+ if (WB.isDead())
+ UpdateMI->getOperand(0).setIsDead();
+ NewMIs.push_back(UpdateMI);
+ NewMIs.push_back(MemMI);
+ }
+
+ // Transfer LiveVariables states, kill / dead info.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegister() && MO.getReg() &&
+ MRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned Reg = MO.getReg();
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+ if (MO.isDef()) {
+ MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+ if (MO.isDead())
+ LV.addVirtualRegisterDead(Reg, NewMI);
+ // Update the defining instruction.
+ if (VI.DefInst == MI)
+ VI.DefInst = NewMI;
+ }
+ if (MO.isUse() && MO.isKill()) {
+ for (unsigned j = 0; j < 2; ++j) {
+ // Look at the two new MI's in reverse order.
+ MachineInstr *NewMI = NewMIs[j];
+ MachineOperand *NMO = NewMI->findRegisterUseOperand(Reg);
+ if (!NMO)
+ continue;
+ LV.addVirtualRegisterKilled(Reg, NewMI);
+ if (VI.removeKill(MI))
+ VI.Kills.push_back(NewMI);
+ break;
+ }
+ }
+ }
+ }
+
+ MFI->insert(MBBI, NewMIs[1]);
+ MFI->insert(MBBI, NewMIs[0]);
+ return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode()))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ unsigned LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isTerminatorInstr((--I)->getOpcode())) {
+ if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+ if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
+
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isTerminatorInstr((--I)->getOpcode()))
+ return true;
+
+ // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+ if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+ (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+ TBB = SecondLastInst->getOperand(0).getMachineBasicBlock();
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMachineBasicBlock();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+
+void ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return;
+ --I;
+ if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+ return;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return;
+ --I;
+ if (I->getOpcode() != BccOpc)
+ return;
+
+ // Remove the branch.
+ I->eraseFromParent();
+}
+
+void ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const std::vector<MachineOperand> &Cond) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int BOpc = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+ int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "ARM branch conditions have two components!");
+
+ if (FBB == 0) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, get(BOpc)).addMBB(TBB);
+ else
+ BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
+ return;
+ }
+
+ // Two-way conditional branch.
+ BuildMI(&MBB, get(BccOpc)).addMBB(TBB).addImm(Cond[0].getImm());
+ BuildMI(&MBB, get(BOpc)).addMBB(FBB);
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const {
+ if (MBB.empty()) return false;
+
+ switch (MBB.back().getOpcode()) {
+ case ARM::B:
+ case ARM::tB: // Uncond branch.
+ case ARM::BR_JTr: // Jumptable branch.
+ case ARM::BR_JTm: // Jumptable branch through mem.
+ case ARM::BR_JTadd: // Jumptable branch add to pc.
+ return true;
+ default: return false;
+ }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(std::vector<MachineOperand> &Cond) const {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+ Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+ return false;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 0621c70d14..0208121f14 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.h - ARM Instruction Information --------------*- C++ -*-===//
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,11 +19,56 @@
#include "ARMRegisterInfo.h"
namespace llvm {
+ class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+ enum {
+ //===------------------------------------------------------------------===//
+ // Instruction Flags.
+
+ //===------------------------------------------------------------------===//
+ // This three-bit field describes the addressing mode used. Zero is unused
+ // so that we can tell if we forgot to set a value.
+
+ AddrModeMask = 0xf,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrModeT1 = 6,
+ AddrModeT2 = 7,
+ AddrModeT4 = 8,
+ AddrModeTs = 9, // i8 * 4 for pc and sp relative data
+
+ // Size* - Flags to keep track of the size of an instruction.
+ SizeShift = 4,
+ SizeMask = 7 << SizeShift,
+ SizeSpecial = 1, // 0 byte pseudo or special case.
+ Size8Bytes = 2,
+ Size4Bytes = 3,
+ Size2Bytes = 4,
+
+ // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+ // and store ops
+ IndexModeShift = 7,
+ IndexModeMask = 3 << IndexModeShift,
+ IndexModePre = 1,
+ IndexModePost = 2,
+
+ // Opcode
+ OpcodeShift = 9,
+ OpcodeMask = 0xf << OpcodeShift
+ };
+}
class ARMInstrInfo : public TargetInstrInfo {
const ARMRegisterInfo RI;
public:
- ARMInstrInfo();
+ ARMInstrInfo(const ARMSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -35,15 +80,33 @@ public:
/// This is used for addressing modes.
virtual const TargetRegisterClass *getPointerRegClass() const;
+ /// getDWARF_LABELOpcode - Return the opcode of the target's DWARF_LABEL
+ /// instruction if it has one. This is used by codegen passes that update
+ /// DWARF line number info as they modify the code.
+ virtual unsigned getDWARF_LABELOpcode() const;
+
/// Return true if the instruction is a register to register move and
/// leave the source and dest operands in the passed parameters.
///
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg) const;
+ virtual unsigned isLoadFromStackSlot(MachineInstr *MI, int &FrameIndex) const;
+ virtual unsigned isStoreToStackSlot(MachineInstr *MI, int &FrameIndex) const;
+
+ virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables &LV) const;
+ // Branch analysis.
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ std::vector<MachineOperand> &Cond) const;
+ virtual void RemoveBranch(MachineBasicBlock &MBB) const;
virtual void InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const std::vector<MachineOperand> &Cond) const;
+ virtual bool BlockHasNoFallThrough(MachineBasicBlock &MBB) const;
+ virtual bool ReverseBranchCondition(std::vector<MachineOperand> &Cond) const;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index beed8abef9..bde81bcfe8 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.td - Target Description for ARM Target ----------------===//
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,351 +12,1184 @@
//
//===----------------------------------------------------------------------===//
-// Address operands
-def op_addr_mode1 : Operand<iPTR> {
- let PrintMethod = "printAddrMode1";
- let MIOperandInfo = (ops ptr_rc, ptr_rc, i32imm);
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond : SDTypeProfile<0, 2,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT : SDTypeProfile<0, 3,
+ [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+// Node definitions.
+def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
+def ARMWrapperCall : SDNode<"ARMISD::WrapperCall", SDTIntUnaryOp>;
+def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeq,
+ [SDNPHasChain, SDNPOutFlag]>;
+
+def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+ [SDNPInFlag]>;
+def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+ [SDNPInFlag]>;
+
+def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+ [SDNPHasChain]>;
+
+def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+ [SDNPOutFlag]>;
+
+def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+ string Constraints = C;
}
-def op_addr_mode2 : Operand<iPTR> {
- let PrintMethod = "printAddrMode2";
- let MIOperandInfo = (ops ptr_rc, i32imm);
+//===----------------------------------------------------------------------===//
+// ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getValue()),
+ MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getValue()),
+ MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getValue();
+ return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getValue() >= 1 && (int32_t)N->getValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+ return (int32_t)N->getValue() >= 16 && (int32_t)N->getValue() < 32;
+}]>;
+
+def so_imm_neg :
+ PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getValue()) != -1; }],
+ so_imm_neg_XFORM>;
+
+def so_imm_not :
+ PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(int)N->getValue()) != -1; }],
+ so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+ return TLI.ComputeNumSignBits(SDOperand(N,0)) >= 17;
+}]>;
+
+
+// Break so_imm's up into two pieces. This handles immediates with up to 16
+// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getValue());
+}]>;
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+def brtarget : Operand<OtherVT>;
+
+// Operand for printing out a condition code.
+def CCOp : Operand<i32> {
+ let PrintMethod = "printCCOperand";
+}
+
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+ let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ let PrintMethod = "printCPInstOperand";
+}
+
+def jtblock_operand : Operand<i32> {
+ let PrintMethod = "printJTBlockOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+ let PrintMethod = "printPCLabel";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>, // reg reg imm
+ ComplexPattern<i32, 3, "SelectShifterOperandReg",
+ [shl,srl,sra,rotr]> {
+ let PrintMethod = "printSORegOperand";
+ let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+ PatLeaf<(imm),
+ [{ return ARM_AM::getSOImmVal(N->getValue()) != -1; }],
+ so_imm_XFORM> {
+ let PrintMethod = "printSOImmOperand";
}
-def op_addr_mode5 : Operand<iPTR> {
- let PrintMethod = "printAddrMode5";
- let MIOperandInfo = (ops ptr_rc, i32imm);
+
+// Define ARM specific addressing modes.
+
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
-// Define ARM specific addressing mode.
-//Addressing Mode 1: data processing operands
-def addr_mode1 : ComplexPattern<iPTR, 3, "SelectAddrMode1", [imm, sra, shl, srl],
- []>;
+def am2offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ let PrintMethod = "printAddrMode2OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
-//Addressing Mode 2: Load and Store Word or Unsigned Byte
-def addr_mode2 : ComplexPattern<iPTR, 2, "SelectAddrMode2", [], []>;
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+ let PrintMethod = "printAddrMode3Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
-//Addressing Mode 5: VFP load/store
-def addr_mode5 : ComplexPattern<iPTR, 2, "SelectAddrMode5", [], []>;
+def am3offset : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ let PrintMethod = "printAddrMode3OffsetOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+ ComplexPattern<i32, 2, "", []> {
+ let PrintMethod = "printAddrMode4Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+ let PrintMethod = "printAddrMode5Operand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+ let PrintMethod = "printAddrModePCOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
//===----------------------------------------------------------------------===//
-// Instruction Class Templates
+// ARM Instruction flags. These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+ bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrModeT1 : AddrMode<6>;
+def AddrModeT2 : AddrMode<7>;
+def AddrModeT4 : AddrMode<8>;
+def AddrModeTs : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+ bits<3> Value = val;
+}
+def SizeInvalid : SizeFlagVal<0>; // Unset.
+def SizeSpecial : SizeFlagVal<1>; // Pseudo or special.
+def Size8Bytes : SizeFlagVal<2>;
+def Size4Bytes : SizeFlagVal<3>;
+def Size2Bytes : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+ bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
//===----------------------------------------------------------------------===//
-class InstARM<dag ops, string asmstr, list<dag> pattern> : Instruction {
+// ARM Instruction templates.
+//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+
+class InstARM<bits<4> opcod, AddrMode am, SizeFlagVal sz, IndexMode im,
+ dag ops, string asmstr, string cstr>
+ : Instruction {
let Namespace = "ARM";
+ bits<4> Opcode = opcod;
+ AddrMode AM = am;
+ bits<4> AddrModeBits = AM.Value;
+
+ SizeFlagVal SZ = sz;
+ bits<3> SizeFlag = SZ.Value;
+
+ IndexMode IM = im;
+ bits<2> IndexModeBits = IM.Value;
+
dag OperandList = ops;
let AsmString = asmstr;
+ let Constraints = cstr;
+}
+
+class PseudoInst<dag ops, string asm, list<dag> pattern>
+ : InstARM<0, AddrModeNone, SizeSpecial, IndexModeNone, ops, asm, ""> {
+ let Pattern = pattern;
+}
+
+class I<dag ops, AddrMode am, SizeFlagVal sz, IndexMode im,
+ string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, im, ops, asm, cstr> {
let Pattern = pattern;
+ list<Predicate> Predicates = [IsARM];
}
-class IntBinOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops IntRegs:$dst, IntRegs:$a, IntRegs:$b),
- !strconcat(OpcStr, " $dst, $a, $b"),
- [(set IntRegs:$dst, (OpNode IntRegs:$a, IntRegs:$b))]>;
+class AI<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrModeNone, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI1<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode1, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI2<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI3<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AI4<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode4, Size4Bytes, IndexModeNone, asm, "", pattern>;
+class AIx2<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrModeNone, Size8Bytes, IndexModeNone, asm, "", pattern>;
+
+// Pre-indexed ops
+class AI2pr<dag ops, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModePre, asm, cstr, pattern>;
+class AI3pr<dag ops, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModePre, asm, cstr, pattern>;
+
+// Post-indexed ops
+class AI2po<dag ops, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode2, Size4Bytes, IndexModePost, asm, cstr, pattern>;
+class AI3po<dag ops, string asm, string cstr, list<dag> pattern>
+ : I<ops, AddrMode3, Size4Bytes, IndexModePost, asm, cstr, pattern>;
+
+// BR_JT instructions
+class JTI<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrModeNone, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI1<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode1, SizeSpecial, IndexModeNone, asm, "", pattern>;
+class JTI2<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode2, SizeSpecial, IndexModeNone, asm, "", pattern>;
+
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+
+/// AI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AI1_bin_irs<string opc, PatFrag opnode> {
+ def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rr : AI1<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+ def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
-class FPBinOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops FPRegs:$dst, FPRegs:$a, FPRegs:$b),
- !strconcat(OpcStr, " $dst, $a, $b"),
- [(set FPRegs:$dst, (OpNode FPRegs:$a, FPRegs:$b))]>;
+/// AI1_bin0_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns.
+/// Similar to AI1_bin_irs except the instruction does not produce a result.
+multiclass AI1_bin0_irs<string opc, PatFrag opnode> {
+ def ri : AI1<(ops GPR:$a, so_imm:$b),
+ !strconcat(opc, " $a, $b"),
+ [(opnode GPR:$a, so_imm:$b)]>;
+ def rr : AI1<(ops GPR:$a, GPR:$b),
+ !strconcat(opc, " $a, $b"),
+ [(opnode GPR:$a, GPR:$b)]>;
+ def rs : AI1<(ops GPR:$a, so_reg:$b),
+ !strconcat(opc, " $a, $b"),
+ [(opnode GPR:$a, so_reg:$b)]>;
+}
-class DFPBinOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops DFPRegs:$dst, DFPRegs:$a, DFPRegs:$b),
- !strconcat(OpcStr, " $dst, $a, $b"),
- [(set DFPRegs:$dst, (OpNode DFPRegs:$a, DFPRegs:$b))]>;
+/// AI1_bin_is - Defines a set of (op r, {so_imm|so_reg}) patterns for a binop.
+multiclass AI1_bin_is<string opc, PatFrag opnode> {
+ def ri : AI1<(ops GPR:$dst, GPR:$a, so_imm:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+ def rs : AI1<(ops GPR:$dst, GPR:$a, so_reg:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
-class FPUnaryOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops FPRegs:$dst, FPRegs:$src),
- !strconcat(OpcStr, " $dst, $src"),
- [(set FPRegs:$dst, (OpNode FPRegs:$src))]>;
+/// AI1_unary_irs - Defines a set of (op {so_imm|r|so_reg}) patterns for unary
+/// ops.
+multiclass AI1_unary_irs<string opc, PatFrag opnode> {
+ def i : AI1<(ops GPR:$dst, so_imm:$a),
+ !strconcat(opc, " $dst, $a"),
+ [(set GPR:$dst, (opnode so_imm:$a))]>;
+ def r : AI1<(ops GPR:$dst, GPR:$a),
+ !strconcat(opc, " $dst, $a"),
+ [(set GPR:$dst, (opnode GPR:$a))]>;
+ def s : AI1<(ops GPR:$dst, so_reg:$a),
+ !strconcat(opc, " $dst, $a"),
+ [(set GPR:$dst, (opnode so_reg:$a))]>;
+}
-class DFPUnaryOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops DFPRegs:$dst, DFPRegs:$src),
- !strconcat(OpcStr, " $dst, $src"),
- [(set DFPRegs:$dst, (OpNode DFPRegs:$src))]>;
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_unary_rrot<string opc, PatFrag opnode> {
+ def r : AI<(ops GPR:$dst, GPR:$Src),
+ !strconcat(opc, " $dst, $Src"),
+ [(set GPR:$dst, (opnode GPR:$Src))]>, Requires<[IsARM, HasV6]>;
+ def r_rot : AI<(ops GPR:$dst, GPR:$Src, i32imm:$rot),
+ !strconcat(opc, " $dst, $Src, ror $rot"),
+ [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
+
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<string opc, PatFrag opnode> {
+ def rr : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS),
+ !strconcat(opc, " $dst, $LHS, $RHS"),
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[IsARM, HasV6]>;
+ def rr_rot : AI<(ops GPR:$dst, GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ !strconcat(opc, " $dst, $LHS, $RHS, ror $rot"),
+ [(set GPR:$dst, (opnode GPR:$LHS,
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]>;
+}
-class Addr1BinOp<string OpcStr, SDNode OpNode> :
- InstARM<(ops IntRegs:$dst, IntRegs:$a, op_addr_mode1:$b),
- !strconcat(OpcStr, " $dst, $a, $b"),
- [(set IntRegs:$dst, (OpNode IntRegs:$a, addr_mode1:$b))]>;
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-def brtarget : Operand<OtherVT>;
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+def IMPLICIT_DEF_GPR :
+PseudoInst<(ops GPR:$rD),
+ "@ IMPLICIT_DEF_GPR $rD",
+ [(set GPR:$rD, (undef))]>;
+
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function. The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+def CONSTPOOL_ENTRY :
+PseudoInst<(ops cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size),
+ "${instid:label} ${cpidx:cpentry}", []>;
+
+def ADJCALLSTACKUP :
+PseudoInst<(ops i32imm:$amt),
+ "@ ADJCALLSTACKUP $amt",
+ [(ARMcallseq_end imm:$amt)]>, Imp<[SP],[SP]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(ops i32imm:$amt),
+ "@ ADJCALLSTACKDOWN $amt",
+ [(ARMcallseq_start imm:$amt)]>, Imp<[SP],[SP]>;
+
+def DWARF_LOC :
+PseudoInst<(ops i32imm:$line, i32imm:$col, i32imm:$file),
+ ".loc $file, $line, $col",
+ [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+def DWARF_LABEL :
+PseudoInst<(ops i32imm:$id),
+ "\nLdebug_loc${id:no_hash}:",
+ [(dwarf_label (i32 imm:$id))]>;
+
+def PICADD : AI1<(ops GPR:$dst, GPR:$a, pclabel:$cp),
+ "\n$cp:\n\tadd $dst, pc, $a",
+ [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+let AddedComplexity = 10 in
+def PICLD : AI2<(ops GPR:$dst, addrmodepc:$addr),
+ "\n${addr:label}:\n\tldr $dst, $addr",
+ [(set GPR:$dst, (load addrmodepc:$addr))]>;
-// Operand for printing out a condition code.
-let PrintMethod = "printCCOperand" in
- def CCOp : Operand<i32>;
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
-def SDT_ARMCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
+let isReturn = 1, isTerminator = 1 in
+ def BX_RET : AI<(ops), "bx lr", [(ARMretflag)]>;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isLoad = 1, isReturn = 1, isTerminator = 1 in
+ def LDM_RET : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops),
+ "ldm${addr:submode} $addr, $dst1",
+ []>;
+
+let isCall = 1, noResults = 1,
+ Defs = [R0, R1, R2, R3, R12, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7] in {
+ def BL : AI<(ops i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>;
+ // ARMv5T and above
+ def BLX : AI<(ops GPR:$dst, variable_ops),
+ "blx $dst",
+ [(ARMcall GPR:$dst)]>, Requires<[IsARM, HasV5T]>;
+ // ARMv4T
+ def BX : AIx2<(ops GPR:$dst, variable_ops),
+ "mov lr, pc\n\tbx $dst",
+ [(ARMcall_nolink GPR:$dst)]>;
+}
-def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def retflag : SDNode<"ARMISD::RET_FLAG", SDTRet,
- [SDNPHasChain, SDNPOptInFlag]>;
-
-def SDTarmselect : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
-def armselect : SDNode<"ARMISD::SELECT", SDTarmselect, [SDNPInFlag, SDNPOutFlag]>;
-
-def SDTarmfmstat : SDTypeProfile<0, 0, []>;
-def armfmstat : SDNode<"ARMISD::FMSTAT", SDTarmfmstat, [SDNPInFlag, SDNPOutFlag]>;
-
-def SDTarmbr : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
-def armbr : SDNode<"ARMISD::BR", SDTarmbr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-
-def SDTVoidBinOp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
-def armcmp : SDNode<"ARMISD::CMP", SDTVoidBinOp, [SDNPOutFlag]>;
-
-def armfsitos : SDNode<"ARMISD::FSITOS", SDTUnaryOp>;
-def armftosis : SDNode<"ARMISD::FTOSIS", SDTUnaryOp>;
-def armfsitod : SDNode<"ARMISD::FSITOD", SDTUnaryOp>;
-def armftosid : SDNode<"ARMISD::FTOSID", SDTUnaryOp>;
-def armfuitos : SDNode<"ARMISD::FUITOS", SDTUnaryOp>;
-def armftouis : SDNode<"ARMISD::FTOUIS", SDTUnaryOp>;
-def armfuitod : SDNode<"ARMISD::FUITOD", SDTUnaryOp>;
-def armftouid : SDNode<"ARMISD::FTOUID", SDTUnaryOp>;
-
-def SDTarmfmrrd : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisFP<2>]>;
-def armfmrrd : SDNode<"ARMISD::FMRRD", SDTarmfmrrd,
- [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-
-def SDTarmfmdrr : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>;
-def armfmdrr : SDNode<"ARMISD::FMDRR", SDTarmfmdrr, []>;
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def B : AI<(ops brtarget:$dst), "b $dst",
+ [(br bb:$dst)]>;
+
+ def BR_JTr : JTI<(ops GPR:$dst, jtblock_operand:$jt, i32imm:$id),
+ "mov pc, $dst \n$jt",
+ [(ARMbrjt GPR:$dst, tjumptable:$jt, imm:$id)]>;
+ def BR_JTm : JTI2<(ops addrmode2:$dst, jtblock_operand:$jt, i32imm:$id),
+ "ldr pc, $dst \n$jt",
+ [(ARMbrjt (i32 (load addrmode2:$dst)), tjumptable:$jt,
+ imm:$id)]>;
+ def BR_JTadd : JTI1<(ops GPR:$dst, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+ "add pc, $dst, $idx \n$jt",
+ [(ARMbrjt (add GPR:$dst, GPR:$idx), tjumptable:$jt,
+ imm:$id)]>;
+}
-def ADJCALLSTACKUP : InstARM<(ops i32imm:$amt),
- "!ADJCALLSTACKUP $amt",
- [(callseq_end imm:$amt)]>, Imp<[R13],[R13]>;
-
-def ADJCALLSTACKDOWN : InstARM<(ops i32imm:$amt),
- "!ADJCALLSTACKDOWN $amt",
- [(callseq_start imm:$amt)]>, Imp<[R13],[R13]>;
-
-def IMPLICIT_DEF_Int : InstARM<(ops IntRegs:$dst),
- "@IMPLICIT_DEF $dst",
- [(set IntRegs:$dst, (undef))]>;
-def IMPLICIT_DEF_FP : InstARM<(ops FPRegs:$dst), "@IMPLICIT_DEF $dst",
- [(set FPRegs:$dst, (undef))]>;
-def IMPLICIT_DEF_DFP : InstARM<(ops DFPRegs:$dst), "@IMPLICIT_DEF $dst",
- [(set DFPRegs:$dst, (undef))]>;
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
+ def Bcc : AI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
+ [(ARMbrcond bb:$dst, imm:$cc)]>;
-let isReturn = 1 in {
- def bx: InstARM<(ops), "bx r14", [(retflag)]>;
-}
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
-let noResults = 1, Defs = [R0, R1, R2, R3, R14] in {
- def bl: InstARM<(ops i32imm:$func, variable_ops), "bl $func", []>;
- def blx : InstARM<(ops IntRegs:$func, variable_ops), "blx $func", [(ARMcall IntRegs:$func)]>;
-}
+// Load
+let isLoad = 1 in {
+def LDR : AI2<(ops GPR:$dst, addrmode2:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Loads with zero extension
+def LDRH : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldrh $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB : AI2<(ops GPR:$dst, addrmode2:$addr),
+ "ldrb $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldrsh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldrsb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+// Load doubleword
+def LDRD : AI3<(ops GPR:$dst, addrmode3:$addr),
+ "ldrd $dst, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+ "ldr $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base, am2offset:$offset),
+ "ldr $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldrh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldrh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE : AI2pr<(ops GPR:$dst, GPR:$base_wb, addrmode2:$addr),
+ "ldrb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am2offset:$offset),
+ "ldrb $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldrsh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldrsh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3pr<(ops GPR:$dst, GPR:$base_wb, addrmode3:$addr),
+ "ldrsb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3po<(ops GPR:$dst, GPR:$base_wb, GPR:$base,am3offset:$offset),
+ "ldrsb $dst, [$base], $offset", "$base = $base_wb", []>;
+} // isLoad
+
+// Store
+let isStore = 1 in {
+def STR : AI2<(ops GPR:$src, addrmode2:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3<(ops GPR:$src, addrmode3:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2<(ops GPR:$src, addrmode2:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+def STRD : AI3<(ops GPR:$src, addrmode3:$addr),
+ "strd $src, $addr",
+ []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base, am2offset:$offset),
+ "str $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "str $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+ "strh $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb,
+ (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am3offset:$offset),
+ "strh $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+ GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2pr<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "strb $src, [$base, $offset]!", "$base = $base_wb",
+ [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2po<(ops GPR:$base_wb, GPR:$src, GPR:$base,am2offset:$offset),
+ "strb $src, [$base], $offset", "$base = $base_wb",
+ [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+ GPR:$base, am2offset:$offset))]>;
+} // isStore
-def LDR : InstARM<(ops IntRegs:$dst, op_addr_mode2:$addr),
- "ldr $dst, $addr",
- [(set IntRegs:$dst, (load addr_mode2:$addr))]>;
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
-def LDRB : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
- "ldrb $dst, [$addr]",
- [(set IntRegs:$dst, (zextloadi8 IntRegs:$addr))]>;
+let isLoad = 1 in
+def LDM : AI4<(ops addrmode4:$addr, reglist:$dst1, variable_ops),
+ "ldm${addr:submode} $addr, $dst1",
+ []>;
-def LDRSB : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
- "ldrsb $dst, [$addr]",
- [(set IntRegs:$dst, (sextloadi8 IntRegs:$addr))]>;
+let isStore = 1 in
+def STM : AI4<(ops addrmode4:$addr, reglist:$src1, variable_ops),
+ "stm${addr:submode} $addr, $src1",
+ []>;
-def LDRH : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
- "ldrh $dst, [$addr]",
- [(set IntRegs:$dst, (zextloadi16 IntRegs:$addr))]>;
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
-def LDRSH : InstARM<(ops IntRegs:$dst, IntRegs:$addr),
- "ldrsh $dst, [$addr]",
- [(set IntRegs:$dst, (sextloadi16 IntRegs:$addr))]>;
+def MOVrr : AI1<(ops GPR:$dst, GPR:$src),
+ "mov $dst, $src", []>;
+def MOVrs : AI1<(ops GPR:$dst, so_reg:$src),
+ "mov $dst, $src", [(set GPR:$dst, so_reg:$src)]>;
+def MOVri : AI1<(ops GPR:$dst, so_imm:$src),
+ "mov $dst, $src", [(set GPR:$dst, so_imm:$src)]>;
-def STR : InstARM<(ops IntRegs:$src, op_addr_mode2:$addr),
- "str $src, $addr",
- [(store IntRegs:$src, addr_mode2:$addr)]>;
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
-def STRB : InstARM<(ops IntRegs:$src, IntRegs:$addr),
- "strb $src, [$addr]",
- [(truncstorei8 IntRegs:$src, IntRegs:$addr)]>;
+def MOVsrl_flag : AI1<(ops GPR:$dst, GPR:$src),
+ "movs $dst, $src, lsr #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>;
+def MOVsra_flag : AI1<(ops GPR:$dst, GPR:$src),
+ "movs $dst, $src, asr #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>;
+def MOVrrx : AI1<(ops GPR:$dst, GPR:$src),
+ "mov $dst, $src, rrx",
+ [(set GPR:$dst, (ARMrrx GPR:$src))]>;
-def STRH : InstARM<(ops IntRegs:$src, IntRegs:$addr),
- "strh $src, [$addr]",
- [(truncstorei16 IntRegs:$src, IntRegs:$addr)]>;
-def MOV : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src),
- "mov $dst, $src", [(set IntRegs:$dst, addr_mode1:$src)]>;
+//===----------------------------------------------------------------------===//
+// Extend Instructions.
+//
-def MVN : InstARM<(ops IntRegs:$dst, op_addr_mode1:$src),
- "mvn $dst, $src", [(set IntRegs:$dst, (not addr_mode1:$src))]>;
+// Sign extenders
-def ADD : Addr1BinOp<"add", add>;
-def ADCS : Addr1BinOp<"adcs", adde>;
-def ADDS : Addr1BinOp<"adds", addc>;
-def SUB : Addr1BinOp<"sub", sub>;
-def SBCS : Addr1BinOp<"sbcs", sube>;
-def SUBS : Addr1BinOp<"subs", subc>;
-def AND : Addr1BinOp<"and", and>;
-def EOR : Addr1BinOp<"eor", xor>;
-def ORR : Addr1BinOp<"orr", or>;
+defm SXTB : AI_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
-let isTwoAddress = 1 in {
- def movcond : InstARM<(ops IntRegs:$dst, IntRegs:$false,
- op_addr_mode1:$true, CCOp:$cc),
- "mov$cc $dst, $true",
- [(set IntRegs:$dst, (armselect addr_mode1:$true,
- IntRegs:$false, imm:$cc))]>;
+defm SXTAB : AI_bin_rrot<"sxtab",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<"sxtah",
+ BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
- def fcpyscond : InstARM<(ops FPRegs:$dst, FPRegs:$false,
- FPRegs:$true, CCOp:$cc),
- "fcpys$cc $dst, $true",
- [(set FPRegs:$dst, (armselect FPRegs:$true,
- FPRegs:$false, imm:$cc))]>;
+// TODO: SXT(A){B|H}16
- def fcpydcond : InstARM<(ops DFPRegs:$dst, DFPRegs:$false,
- DFPRegs:$true, CCOp:$cc),
- "fcpyd$cc $dst, $true",
- [(set DFPRegs:$dst, (armselect DFPRegs:$true,
- DFPRegs:$false, imm:$cc))]>;
-}
+// Zero extenders
-def MUL : IntBinOp<"mul", mul>;
+let AddedComplexity = 16 in {
+defm UXTB : AI_unary_rrot<"uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_unary_rrot<"uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-let Defs = [R0] in {
- def SMULL : IntBinOp<"smull r12,", mulhs>;
- def UMULL : IntBinOp<"umull r12,", mulhu>;
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+ (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<"uxtab",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<"uxtah",
+ BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
-let isTerminator = 1, isBranch = 1 in {
- def bcond : InstARM<(ops brtarget:$dst, CCOp:$cc),
- "b$cc $dst",
- [(armbr bb:$dst, imm:$cc)]>;
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
- def b : InstARM<(ops brtarget:$dst),
- "b $dst",
- [(br bb:$dst)]>;
-}
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+defm ADD : AI1_bin_irs<"add" , BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm ADDS : AI1_bin_irs<"adds", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm ADC : AI1_bin_irs<"adc" , BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SUB : AI1_bin_irs<"sub" , BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+defm SUBS : AI1_bin_irs<"subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm SBC : AI1_bin_irs<"sbc" , BinOpFrag<(sube node:$LHS, node:$RHS)>>;
-def cmp : InstARM<(ops IntRegs:$a, op_addr_mode1:$b),
- "cmp $a, $b",
- [(armcmp IntRegs:$a, addr_mode1:$b)]>;
+// These don't define reg/reg forms, because they are handled above.
+defm RSB : AI1_bin_is <"rsb" , BinOpFrag<(sub node:$RHS, node:$LHS)>>;
+defm RSBS : AI1_bin_is <"rsbs", BinOpFrag<(subc node:$RHS, node:$LHS)>>;
+defm RSC : AI1_bin_is <"rsc" , BinOpFrag<(sube node:$RHS, node:$LHS)>>;
-// Floating Point Compare
-def fcmps : InstARM<(ops FPRegs:$a, FPRegs:$b),
- "fcmps $a, $b",
- [(armcmp FPRegs:$a, FPRegs:$b)]>;
+// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
+ (SUBri GPR:$src, so_imm_neg:$imm)>;
-def fcmpd : InstARM<(ops DFPRegs:$a, DFPRegs:$b),
- "fcmpd $a, $b",
- [(armcmp DFPRegs:$a, DFPRegs:$b)]>;
+//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+// (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm),
+// (SBCri GPR:$src, so_imm_neg:$imm)>;
-// Floating Point Copy
-def FCPYS : InstARM<(ops FPRegs:$dst, FPRegs:$src), "fcpys $dst, $src", []>;
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
-def FCPYD : InstARM<(ops DFPRegs:$dst, DFPRegs:$src), "fcpyd $dst, $src", []>;
-// Floating Point Conversion
-// We use bitconvert for moving the data between the register classes.
-// The format conversion is done with ARM specific nodes
+//===----------------------------------------------------------------------===//
+// Bitwise Instructions.
+//
-def FMSR : InstARM<(ops FPRegs:$dst, IntRegs:$src),
- "fmsr $dst, $src", [(set FPRegs:$dst, (bitconvert IntRegs:$src))]>;
+defm AND : AI1_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR : AI1_bin_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>>;
+defm EOR : AI1_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC : AI1_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-def FMRS : InstARM<(ops IntRegs:$dst, FPRegs:$src),
- "fmrs $dst, $src", [(set IntRegs:$dst, (bitconvert FPRegs:$src))]>;
+defm MVN : AI1_unary_irs<"mvn", not>;
-def FMRRD : InstARM<(ops IntRegs:$i0, IntRegs:$i1, DFPRegs:$src),
- "fmrrd $i0, $i1, $src", [(armfmrrd IntRegs:$i0, IntRegs:$i1, DFPRegs:$src)]>;
+def : ARMPat<(i32 so_imm_not:$imm),
+ (MVNi so_imm_not:$imm)>;
-def FMDRR : InstARM<(ops DFPRegs:$dst, IntRegs:$i0, IntRegs:$i1),
- "fmdrr $dst, $i0, $i1", [(set DFPRegs:$dst, (armfmdrr IntRegs:$i0, IntRegs:$i1))]>;
+def : ARMPat<(and GPR:$src, so_imm_not:$imm),
+ (BICri GPR:$src, so_imm_not:$imm)>;
-def FSITOS : InstARM<(ops FPRegs:$dst, FPRegs:$src),
- "fsitos $dst, $src", [(set FPRegs:$dst, (armfsitos FPRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+// Multiply Instructions.
+//
-def FTOSIS : InstARM<(ops FPRegs:$dst, FPRegs:$src),
- "ftosis $dst, $src", [(set FPRegs:$dst, (armftosis FPRegs:$src))]>;
+// AI_orr - Defines a (op r, r) pattern.
+class AI_orr<string opc, SDNode opnode>
+ : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, " $dst, $a, $b"),
+ [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+
+// AI_oorr - Defines a (op (op r, r), r) pattern.
+class AI_oorr<string opc, SDNode opnode1, SDNode opnode2>
+ : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+ !strconcat(opc, " $dst, $a, $b, $c"),
+ [(set GPR:$dst, (opnode1 (opnode2 GPR:$a, GPR:$b), GPR:$c))]>;
+
+def MUL : AI_orr<"mul", mul>;
+def MLA : AI_oorr<"mla", add, mul>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "smull $ldst, $hdst, $a, $b",
+ []>;
+
+def UMULL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umull $ldst, $hdst, $a, $b",
+ []>;
+
+// Multiply + accumulate
+def SMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "smlal $ldst, $hdst, $a, $b",
+ []>;
+
+def UMLAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umlal $ldst, $hdst, $a, $b",
+ []>;
+
+def UMAAL : AI<(ops GPR:$ldst, GPR:$hdst, GPR:$a, GPR:$b),
+ "umaal $ldst, $hdst, $a, $b",
+ []>, Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AI_orr<"smmul", mulhs>, Requires<[IsARM, HasV6]>;
+def SMMLA : AI_oorr<"smmla", add, mulhs>, Requires<[IsARM, HasV6]>;
+
+
+def SMMLS : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$c),
+ "smmls $dst, $a, $b, $c",
+ [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+ Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+ def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16),
+ (sra (shl GPR:$b, 16), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode sext_16_node:$a, sext_16_node:$b))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bt $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bt $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra (shl GPR:$a, 16), 16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "bt $dst, $a, $b"),
+ [(set GPR:$dst, (opnode sext_16_node:$a, (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sra (shl GPR:$b, 16), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tb $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16), sext_16_node:$b))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "tt $dst, $a, $b"),
+ [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wb $dst, $a, $b"),
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wb $dst, $a, $b"),
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sra (shl GPR:$b, 16), 16)), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wb $dst, $a, $b"),
+ [(set GPR:$dst, (sra (opnode GPR:$a, sext_16_node:$b), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b),
+ !strconcat(opc, "wt $dst, $a, $b"),
+ [(set GPR:$dst, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
-def FSITOD : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
- "fsitod $dst, $src", [(set DFPRegs:$dst, (armfsitod FPRegs:$src))]>;
+multiclass AI_smla<string opc, PatFrag opnode> {
+ def BB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode (sext_inreg GPR:$a, i16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode (sra (shl GPR:$a, 16), 16),
+ (sra (shl GPR:$b, 16), 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc,
+ (opnode sext_16_node:$a, sext_16_node:$b)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra (shl GPR:$a, 16), 16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def BT3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "bt $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode sext_16_node:$a,
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sext_inreg GPR:$b, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sra (shl GPR:$b, 16), 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def TB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ sext_16_node:$b)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "tt $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+ (sra GPR:$b, 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB1 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sext_inreg GPR:$b, i16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WB2 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sra (shl GPR:$b, 16), 16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+ def WB3 : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wb $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ sext_16_node:$b), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AI<(ops GPR:$dst, GPR:$a, GPR:$b, GPR:$acc),
+ !strconcat(opc, "wt $dst, $a, $b, $acc"),
+ [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+ (sra GPR:$b, 16)), 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+}
-def FTOSID : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
- "ftosid $dst, $src", [(set FPRegs:$dst, (armftosid DFPRegs:$src))]>;
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-def FUITOS : InstARM<(ops FPRegs:$dst, FPRegs:$src),
- "fuitos $dst, $src", [(set FPRegs:$dst, (armfuitos FPRegs:$src))]>;
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-def FTOUIS : InstARM<(ops FPRegs:$dst, FPRegs:$src),
- "ftouis $dst, $src", [(set FPRegs:$dst, (armftouis FPRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+// Misc. Arithmetic Instructions.
+//
-def FUITOD : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
- "fuitod $dst, $src", [(set DFPRegs:$dst, (armfuitod FPRegs:$src))]>;
+def CLZ : AI<(ops GPR:$dst, GPR:$src),
+ "clz $dst, $src",
+ [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]>;
+
+def REV : AI<(ops GPR:$dst, GPR:$src),
+ "rev $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AI<(ops GPR:$dst, GPR:$src),
+ "rev16 $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]>;
+
+def REVSH : AI<(ops GPR:$dst, GPR:$src),
+ "revsh $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFFFF), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsARM, HasV6]>;
+
+def PKHBT : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhbt $dst, $src1, $src2, LSL $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+ (and (shl GPR:$src2, (i32 imm:$shamt)),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+ (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+ (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AI<(ops GPR:$dst, GPR:$src1, GPR:$src2, i32imm:$shamt),
+ "pkhtb $dst, $src1, $src2, ASR $shamt",
+ [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+ (and (sra GPR:$src2, imm16_31:$shamt),
+ 0xFFFF)))]>, Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes. Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+ (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+ (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
-def FTOUID : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
- "ftouid $dst, $src", [(set FPRegs:$dst, (armftouid DFPRegs:$src))]>;
-def FCVTDS : InstARM<(ops DFPRegs:$dst, FPRegs:$src),
- "fcvtds $dst, $src", [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+// Comparison Instructions...
+//
+
+defm CMP : AI1_bin0_irs<"cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN : AI1_bin0_irs<"cmn", BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+ (CMNri GPR:$src, so_imm_neg:$imm)>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+def TSTrr : AI1<(ops GPR:$a, so_reg:$b), "tst $a, $b", []>;
+def TSTri : AI1<(ops GPR:$a, so_imm:$b), "tst $a, $b", []>;
+def TEQrr : AI1<(ops GPR:$a, so_reg:$b), "teq $a, $b", []>;
+def TEQri : AI1<(ops GPR:$a, so_imm:$b), "teq $a, $b", []>;
+
+// Conditional moves
+def MOVCCr : AI<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
+ "mov$cc $dst, $true",
+ [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+def MOVCCs : AI<(ops GPR:$dst, GPR:$false, so_reg:$true, CCOp:$cc),
+ "mov$cc $dst, $true",
+ [(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true,imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+def MOVCCi : AI<(ops GPR:$dst, GPR:$false, so_imm:$true, CCOp:$cc),
+ "mov$cc $dst, $true",
+ [(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true,imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AI1<(ops GPR:$dst, i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+def LEApcrelJT : AI1<(ops GPR:$dst, i32imm:$label, i32imm:$id),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+ "${:private}PCRELL${:uid}+8))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add $dst, pc, #PCRELV${:uid}")),
+ []>;
-def FCVTSD : InstARM<(ops FPRegs:$dst, DFPRegs:$src),
- "fcvtsd $dst, $src", [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
-def FMSTAT : InstARM<(ops ), "fmstat", [(armfmstat)]>;
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
-// Floating Point Arithmetic
-def FADDS : FPBinOp<"fadds", fadd>;
-def FADDD : DFPBinOp<"faddd", fadd>;
-def FSUBS : FPBinOp<"fsubs", fsub>;
-def FSUBD : DFPBinOp<"fsubd", fsub>;
+// Large immediate handling.
-def FNEGS : FPUnaryOp<"fnegs", fneg>;
-def FNEGD : DFPUnaryOp<"fnegd", fneg>;
-def FABSS : FPUnaryOp<"fabss", fabs>;
-def FABSD : DFPUnaryOp<"fabsd", fabs>;
+// Two piece so_imms.
+def : ARMPat<(i32 so_imm2part:$src),
+ (ORRri (MOVri (so_imm2part_1 imm:$src)),
+ (so_imm2part_2 imm:$src))>;
-def FMULS : FPBinOp<"fmuls", fmul>;
-def FMULD : DFPBinOp<"fmuld", fmul>;
-def FDIVS : FPBinOp<"fdivs", fdiv>;
-def FDIVD : DFPBinOp<"fdivd", fdiv>;
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+ (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+ (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+ (so_imm2part_2 imm:$RHS))>;
-// Floating Point Load
-def FLDS : InstARM<(ops FPRegs:$dst, op_addr_mode5:$addr),
- "flds $dst, $addr",
- [(set FPRegs:$dst, (load addr_mode5:$addr))]>;
+// TODO: add,sub,and, 3-instr forms?
-def FLDD : InstARM<(ops DFPRegs:$dst, op_addr_mode5:$addr),
- "fldd $dst, $addr",
- [(set DFPRegs:$dst, (load addr_mode5:$addr))]>;
-// Floating Point Store
-def FSTS : InstARM<(ops FPRegs:$src, op_addr_mode5:$addr),
- "fsts $src, $addr",
- [(store FPRegs:$src, addr_mode5:$addr)]>;
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
-def FSTD : InstARM<(ops DFPRegs:$src, op_addr_mode5:$addr),
- "fstd $src, $addr",
- [(store DFPRegs:$src, addr_mode5:$addr)]>;
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
-def : Pat<(ARMcall tglobaladdr:$dst),
- (bl tglobaladdr:$dst)>;
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
-def : Pat<(ARMcall texternalsym:$dst),
- (bl texternalsym:$dst)>;
+// truncstore i1 -> truncstore i8
+def : Pat<(truncstorei1 GPR:$src, addrmode2:$dst),
+ (STRB GPR:$src, addrmode2:$dst)>;
+def : Pat<(pre_truncsti1 GPR:$src, GPR:$base, am2offset:$offset),
+ (STRB_PRE GPR:$src, GPR:$base, am2offset:$offset)>;
+def : Pat<(post_truncsti1 GPR:$src, GPR:$base, am2offset:$offset),
+ (STRB_POST GPR:$src, GPR:$base, am2offset:$offset)>;
-def : Pat<(extloadi8 IntRegs:$addr),
- (LDRB IntRegs:$addr)>;
-def : Pat<(extloadi16 IntRegs:$addr),
- (LDRH IntRegs:$addr)>;
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
-// extload bool -> extload byte
-def : Pat<(extloadi1 IntRegs:$addr), (LDRB IntRegs:$addr)>;
+include "ARMInstrThumb.td"
-// zextload bool -> zextload byte
-def : Pat<(i32 (zextloadi1 IntRegs:$addr)), (LDRB IntRegs:$addr)>;
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
-// truncstore bool -> truncstore byte.
-def : Pat<(truncstorei1 IntRegs:$src, IntRegs:$addr),
- (STRB IntRegs:$src, IntRegs:$addr)>;
+include "ARMInstrVFP.td"
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000000..58cef04188
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,513 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+// TI - Thumb instruction.
+
+// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [IsThumb, HasV5T];
+}
+
+class ThumbI<dag ops, AddrMode am, SizeFlagVal sz,
+ string asm, string cstr, list<dag> pattern>
+ // FIXME: Set all opcodes to 0 for now.
+ : InstARM<0, am, sz, IndexModeNone, ops, asm, cstr> {
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb];
+}
+
+class TI<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI1<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT1, Size2Bytes, asm, "", pattern>;
+class TI2<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT2, Size2Bytes, asm, "", pattern>;
+class TI4<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeT4, Size2Bytes, asm, "", pattern>;
+class TIs<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeTs, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+// BL, BLX(1) are translated by assembler into two instructions
+class TIx2<dag ops, string asm, list<dag> pattern>
+ : ThumbI<ops, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-(int)N->getValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(~((uint32_t)N->getValue()), MVT::i32);
+}]>;
+
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+ return ~((uint32_t)N->getValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getValue() >= 8 && (uint32_t)N->getValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+ unsigned Val = -N->getValue();
+ return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift.
+// This uses thumb_immshifted to match and thumb_immshifted_val and
+// thumb_immshifted_shamt to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+ unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getValue());
+ return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// Define Thumb specific addressing modes.
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+ let PrintMethod = "printThumbAddrModeRROperand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg);
+}
+
+// t_addrmode_ri5_{1|2|4} := reg + imm5 * {1|2|4}
+//
+def t_addrmode_ri5_1 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_1", []> {
+ let PrintMethod = "printThumbAddrModeRI5_1Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+def t_addrmode_ri5_2 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_2", []> {
+ let PrintMethod = "printThumbAddrModeRI5_2Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+def t_addrmode_ri5_4 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeRI5_4", []> {
+ let PrintMethod = "printThumbAddrModeRI5_4Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+ let PrintMethod = "printThumbAddrModeSPOperand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+def tPICADD : TIt<(ops GPR:$dst, GPR:$lhs, pclabel:$cp),
+ "\n$cp:\n\tadd $dst, pc",
+ [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>;
+
+//===----------------------------------------------------------------------===//
+// Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+ def tBX_RET : TI<(ops), "bx lr", [(ARMretflag)]>;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isLoad = 1, isReturn = 1, isTerminator = 1 in
+def tPOP_RET : TI<(ops reglist:$dst1, variable_ops),
+ "pop $dst1", []>;
+
+let isCall = 1, noResults = 1,
+ Defs = [R0, R1, R2, R3, LR,
+ D0, D1, D2, D3, D4, D5, D6, D7] in {
+ def tBL : TIx2<(ops i32imm:$func, variable_ops),
+ "bl ${func:call}",
+ [(ARMtcall tglobaladdr:$func)]>;
+ // ARMv5T and above
+ def tBLXi : TIx2<(ops i32imm:$func, variable_ops),
+ "blx ${func:call}",
+ [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+ def tBLXr : TI<(ops GPR:$dst, variable_ops),
+ "blx $dst",
+ [(ARMtcall GPR:$dst)]>, Requires<[HasV5T]>;
+ // ARMv4T
+ def tBX : TIx2<(ops GPR:$dst, variable_ops),
+ "cpy lr, pc\n\tbx $dst",
+ [(ARMcall_nolink GPR:$dst)]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in
+ def tB : TI<(ops brtarget:$dst), "b $dst", [(br bb:$dst)]>;
+
+let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in
+ def tBcc : TI<(ops brtarget:$dst, CCOp:$cc), "b$cc $dst",
+ [(ARMbrcond bb:$dst, imm:$cc)]>;
+
+//===----------------------------------------------------------------------===//
+// Load Store Instructions.
+//
+
+let isLoad = 1 in {
+def tLDRri : TI4<(ops GPR:$dst, t_addrmode_ri5_4:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load t_addrmode_ri5_4:$addr))]>;
+
+def tLDRrr : TI<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load t_addrmode_rr:$addr))]>;
+// def tLDRpci
+def tLDRspi : TIs<(ops GPR:$dst, t_addrmode_sp:$addr),
+ "ldr $dst, $addr",
+ [(set GPR:$dst, (load t_addrmode_sp:$addr))]>;
+
+def tLDRBri : TI1<(ops GPR:$dst, t_addrmode_ri5_1:$addr),
+ "ldrb $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 t_addrmode_ri5_1:$addr))]>;
+
+def tLDRBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrb $dst, $addr",
+ [(set GPR:$dst, (zextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRHri : TI2<(ops GPR:$dst, t_addrmode_ri5_2:$addr),
+ "ldrh $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 t_addrmode_ri5_2:$addr))]>;
+
+def tLDRHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrh $dst, $addr",
+ [(set GPR:$dst, (zextloadi16 t_addrmode_rr:$addr))]>;
+
+def tLDRSBrr : TI1<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrsb $dst, $addr",
+ [(set GPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRSHrr : TI2<(ops GPR:$dst, t_addrmode_rr:$addr),
+ "ldrsh $dst, $addr",
+ [(set GPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+} // isLoad
+
+let isStore = 1 in {
+def tSTRri : TI4<(ops GPR:$src, t_addrmode_ri5_4:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, t_addrmode_ri5_4:$addr)]>;
+
+def tSTRrr : TI<(ops GPR:$src, t_addrmode_rr:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, t_addrmode_rr:$addr)]>;
+
+def tSTRspi : TIs<(ops GPR:$src, t_addrmode_sp:$addr),
+ "str $src, $addr",
+ [(store GPR:$src, t_addrmode_sp:$addr)]>;
+
+def tSTRBri : TI1<(ops GPR:$src, t_addrmode_ri5_1:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 GPR:$src, t_addrmode_ri5_1:$addr)]>;
+
+def tSTRBrr : TI1<(ops GPR:$src, t_addrmode_rr:$addr),
+ "strb $src, $addr",
+ [(truncstorei8 GPR:$src, t_addrmode_rr:$addr)]>;
+
+def tSTRHri : TI2<(ops GPR:$src, t_addrmode_ri5_2:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 GPR:$src, t_addrmode_ri5_1:$addr)]>;
+
+def tSTRHrr : TI2<(ops GPR:$src, t_addrmode_rr:$addr),
+ "strh $src, $addr",
+ [(truncstorei16 GPR:$src, t_addrmode_rr:$addr)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+// TODO: A7-44: LDMIA - load multiple
+
+let isLoad = 1 in
+def tPOP : TI<(ops reglist:$dst1, variable_ops),
+ "pop $dst1", []>;
+
+let isStore = 1 in
+def tPUSH : TI<(ops reglist:$src1, variable_ops),
+ "push $src1", []>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+def tADDi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_7:$rhs))]>;
+
+def tADDi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "add $dst, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm8_255:$rhs))]>;
+
+def tADDrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, GPR:$rhs))]>;
+
+def tADDhirr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "add $dst, $rhs", []>;
+
+def tADDrPCi : TI<(ops GPR:$dst, i32imm:$rhs),
+ "add $dst, pc, $rhs * 4", []>;
+def tADDrSPi : TI<(ops GPR:$dst, GPR:$sp, i32imm:$rhs),
+ "add $dst, $sp, $rhs * 4", []>;
+def tADDspi : TI<(ops GPR:$sp, i32imm:$rhs),
+ "add $sp, $rhs * 4", []>;
+
+
+def tAND : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "and $dst, $rhs",
+ [(set GPR:$dst, (and GPR:$lhs, GPR:$rhs))]>;
+
+def tASRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "asr $dst, $lhs, $rhs",
+ [(set GPR:$dst, (sra GPR:$lhs, imm:$rhs))]>;
+
+def tASRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "asr $dst, $rhs",
+ [(set GPR:$dst, (sra GPR:$lhs, GPR:$rhs))]>;
+
+def tBIC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "bic $dst, $rhs",
+ [(set GPR:$dst, (and GPR:$lhs, (not GPR:$rhs)))]>;
+
+
+def tCMN : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmn $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, (ineg GPR:$rhs))]>;
+
+def tCMPi8 : TI<(ops GPR:$lhs, i32imm:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPr : TI<(ops GPR:$lhs, GPR:$rhs),
+ "cmp $lhs, $rhs",
+ [(ARMcmp GPR:$lhs, GPR:$rhs)]>;
+
+// TODO: A7-37: CMP(3) - cmp hi regs
+
+def tEOR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "eor $dst, $rhs",
+ [(set GPR:$dst, (xor GPR:$lhs, GPR:$rhs))]>;
+
+def tLSLri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "lsl $dst, $lhs, $rhs",
+ [(set GPR:$dst, (shl GPR:$lhs, imm:$rhs))]>;
+
+def tLSLrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "lsl $dst, $rhs",
+ [(set GPR:$dst, (shl GPR:$lhs, GPR:$rhs))]>;
+
+def tLSRri : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "lsr $dst, $lhs, $rhs",
+ [(set GPR:$dst, (srl GPR:$lhs, imm:$rhs))]>;
+
+def tLSRrr : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "lsr $dst, $rhs",
+ [(set GPR:$dst, (srl GPR:$lhs, GPR:$rhs))]>;
+
+def tMOVri8 : TI<(ops GPR:$dst, i32imm:$src),
+ "mov $dst, $src",
+ [(set GPR:$dst, imm0_255:$src)]>;
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+
+// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+// which is MOV(3). This also supports high registers.
+def tMOVrr : TI<(ops GPR:$dst, GPR:$src),
+ "cpy $dst, $src", []>;
+
+def tMUL : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "mul $dst, $rhs",
+ [(set GPR:$dst, (mul GPR:$lhs, GPR:$rhs))]>;
+
+def tMVN : TI<(ops GPR:$dst, GPR:$src),
+ "mvn $dst, $src",
+ [(set GPR:$dst, (not GPR:$src))]>;
+
+def tNEG : TI<(ops GPR:$dst, GPR:$src),
+ "neg $dst, $src",
+ [(set GPR:$dst, (ineg GPR:$src))]>;
+
+def tORR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "orr $dst, $rhs",
+ [(set GPR:$dst, (or GPR:$lhs, GPR:$rhs))]>;
+
+
+def tREV : TI<(ops GPR:$dst, GPR:$src),
+ "rev $dst, $src",
+ [(set GPR:$dst, (bswap GPR:$src))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREV16 : TI<(ops GPR:$dst, GPR:$src),
+ "rev16 $dst, $src",
+ [(set GPR:$dst,
+ (or (and (srl GPR:$src, 8), 0xFF),
+ (or (and (shl GPR:$src, 8), 0xFF00),
+ (or (and (srl GPR:$src, 8), 0xFF0000),
+ (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tREVSH : TI<(ops GPR:$dst, GPR:$src),
+ "revsh $dst, $src",
+ [(set GPR:$dst,
+ (sext_inreg
+ (or (srl (and GPR:$src, 0xFFFF), 8),
+ (shl GPR:$src, 8)), i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+def tROR : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "ror $dst, $rhs",
+ [(set GPR:$dst, (rotr GPR:$lhs, GPR:$rhs))]>;
+
+def tSBC : TIt<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "sbc $dst, $rhs",
+ [(set GPR:$dst, (sube GPR:$lhs, GPR:$rhs))]>;
+
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBi3 : TI<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_7_neg:$rhs))]>;
+
+def tSUBi8 : TIt<(ops GPR:$dst, GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm8_255_neg:$rhs))]>;
+
+def tSUBrr : TI<(ops GPR:$dst, GPR:$lhs, GPR:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (sub GPR:$lhs, GPR:$rhs))]>;
+
+def tSUBspi : TI<(ops GPR:$sp, i32imm:$rhs),
+ "sub $sp, $rhs * 4", []>;
+
+def tSXTB : TI<(ops GPR:$dst, GPR:$src),
+ "sxtb $dst, $src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i8))]>,
+ Requires<[IsThumb, HasV6]>;
+def tSXTH : TI<(ops GPR:$dst, GPR:$src),
+ "sxth $dst, $src",
+ [(set GPR:$dst, (sext_inreg GPR:$src, i16))]>,
+ Requires<[IsThumb, HasV6]>;
+
+// TODO: A7-122: TST - test.
+
+def tUXTB : TI<(ops GPR:$dst, GPR:$src),
+ "uxtb $dst, $src",
+ [(set GPR:$dst, (and GPR:$src, 0xFF))]>,
+ Requires<[IsThumb, HasV6]>;
+def tUXTH : TI<(ops GPR:$dst, GPR:$src),
+ "uxth $dst, $src",
+ [(set GPR:$dst, (and GPR:$src, 0xFFFF))]>,
+ Requires<[IsThumb, HasV6]>;
+
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Expanded by the scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler.
+ def tMOVCCr :
+ PseudoInst<(ops GPR:$dst, GPR:$false, GPR:$true, CCOp:$cc),
+ "@ tMOVCCr $cc",
+ [(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc))]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def tLEApcrel : TI<(ops GPR:$dst, i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+def tLEApcrelCall : TI<(ops GPR:$dst, i32imm:$label),
+ !strconcat(!strconcat(".set PCRELV${:uid}, (${label:call}-(",
+ "${:private}PCRELL${:uid}+4))\n"),
+ !strconcat("${:private}PCRELL${:uid}:\n\t",
+ "add $dst, pc, #PCRELV${:uid}")),
+ []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress
+def : ThumbPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
+def : ThumbPat<(ARMWrapperCall tglobaladdr :$dst),
+ (tLEApcrelCall tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapperCall texternalsym:$dst),
+ (tLEApcrelCall texternalsym:$dst)>;
+
+// Direct calls
+def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+
+// Indirect calls to ARM routines
+def : ThumbV5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>;
+
+// zextload i1 -> zextload i8
+def : ThumbPat<(zextloadi1 t_addrmode_ri5_1:$addr),
+ (tLDRBri t_addrmode_ri5_1:$addr)>;
+def : ThumbPat<(zextloadi1 t_addrmode_rr:$addr),
+ (tLDRBri t_addrmode_rr:$addr)>;
+
+// truncstore i1 -> truncstore i8
+def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_ri5_1:$dst),
+ (tSTRBri GPR:$src, t_addrmode_ri5_1:$dst)>;
+def : ThumbPat<(truncstorei1 GPR:$src, t_addrmode_rr:$dst),
+ (tSTRBrr GPR:$src, t_addrmode_rr:$dst)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : ThumbPat<(i32 thumb_immshifted:$src),
+ (tLSLri (tMOVri8 (thumb_immshifted_val imm:$src)),
+ (thumb_immshifted_shamt imm:$src))>;
+
+def : ThumbPat<(i32 imm0_255_comp:$src),
+ (tMVN (tMOVri8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000000..cac8e4465c
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,359 @@
+//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// ARM Float Instruction
+class ASI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class ASI5<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+// ARM Double Instruction
+class ADI<dag ops, string asm, list<dag> pattern> : AI<ops, asm, pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+class ADI5<dag ops, string asm, list<dag> pattern>
+ : I<ops, AddrMode5, Size4Bytes, IndexModeNone, asm, "", pattern> {
+ // TODO: Mark the instructions with the appropriate subtarget info.
+}
+
+def SDT_FTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 :
+SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_FMDRR :
+SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTRet, [SDNPInFlag,SDNPOutFlag]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+
+//===----------------------------------------------------------------------===//
+// Load / store Instructions.
+//
+
+let isLoad = 1 in {
+def FLDD : ADI5<(ops DPR:$dst, addrmode5:$addr),
+ "fldd $dst, $addr",
+ [(set DPR:$dst, (load addrmode5:$addr))]>;
+
+def FLDS : ASI5<(ops SPR:$dst, addrmode5:$addr),
+ "flds $dst, $addr",
+ [(set SPR:$dst, (load addrmode5:$addr))]>;
+} // isLoad
+
+let isStore = 1 in {
+def FSTD : ADI5<(ops DPR:$src, addrmode5:$addr),
+ "fstd $src, $addr",
+ [(store DPR:$src, addrmode5:$addr)]>;
+
+def FSTS : ASI5<(ops SPR:$src, addrmode5:$addr),
+ "fsts $src, $addr",
+ [(store SPR:$src, addrmode5:$addr)]>;
+} // isStore
+
+//===----------------------------------------------------------------------===//
+// Load / store multiple Instructions.
+//
+
+let isLoad = 1 in {
+def FLDMD : ADI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
+ "fldm${addr:submode}d ${addr:base}, $dst1",
+ []>;
+
+def FLDMS : ASI5<(ops addrmode5:$addr, reglist:$dst1, variable_ops),
+ "fldm${addr:submode}s ${addr:base}, $dst1",
+ []>;
+} // isLoad
+
+let isStore = 1 in {
+def FSTMD : ADI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
+ "fstm${addr:submode}d ${addr:base}, $src1",
+ []>;
+
+def FSTMS : ASI5<(ops addrmode5:$addr, reglist:$src1, variable_ops),
+ "fstm${addr:submode}s ${addr:base}, $src1",
+ []>;
+} // isStore
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def FADDD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "faddd $dst, $a, $b",
+ [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+
+def FADDS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fadds $dst, $a, $b",
+ [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+
+def FCMPED : ADI<(ops DPR:$a, DPR:$b),
+ "fcmped $a, $b",
+ [(arm_cmpfp DPR:$a, DPR:$b)]>;
+
+def FCMPES : ASI<(ops SPR:$a, SPR:$b),
+ "fcmpes $a, $b",
+ [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def FDIVD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fdivd $dst, $a, $b",
+ [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+
+def FDIVS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fdivs $dst, $a, $b",
+ [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+
+def FMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fmuld $dst, $a, $b",
+ [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+
+def FMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fmuls $dst, $a, $b",
+ [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+
+
+def FNMULD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fnmuld $dst, $a, $b",
+ [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]>;
+
+def FNMULS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fnmuls $dst, $a, $b",
+ [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+
+def FSUBD : ADI<(ops DPR:$dst, DPR:$a, DPR:$b),
+ "fsubd $dst, $a, $b",
+ [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]>;
+
+def FSUBS : ASI<(ops SPR:$dst, SPR:$a, SPR:$b),
+ "fsubs $dst, $a, $b",
+ [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def FABSD : ADI<(ops DPR:$dst, DPR:$a),
+ "fabsd $dst, $a",
+ [(set DPR:$dst, (fabs DPR:$a))]>;
+
+def FABSS : ASI<(ops SPR:$dst, SPR:$a),
+ "fabss $dst, $a",
+ [(set SPR:$dst, (fabs SPR:$a))]>;
+
+def FCMPEZD : ADI<(ops DPR:$a),
+ "fcmpezd $a",
+ [(arm_cmpfp0 DPR:$a)]>;
+
+def FCMPEZS : ASI<(ops SPR:$a),
+ "fcmpezs $a",
+ [(arm_cmpfp0 SPR:$a)]>;
+
+def FCVTDS : ADI<(ops DPR:$dst, SPR:$a),
+ "fcvtds $dst, $a",
+ [(set DPR:$dst, (fextend SPR:$a))]>;
+
+def FCVTSD : ADI<(ops SPR:$dst, DPR:$a),
+ "fcvtsd $dst, $a",
+ [(set SPR:$dst, (fround DPR:$a))]>;
+
+def FCPYD : ADI<(ops DPR:$dst, DPR:$a),
+ "fcpyd $dst, $a",
+ [/*(set DPR:$dst, DPR:$a)*/]>;
+
+def FCPYS : ASI<(ops SPR:$dst, SPR:$a),
+ "fcpys $dst, $a",
+ [/*(set SPR:$dst, SPR:$a)*/]>;
+
+def FNEGD : ADI<(ops DPR:$dst, DPR:$a),
+ "fnegd $dst, $a",
+ [(set DPR:$dst, (fneg DPR:$a))]>;
+
+def FNEGS : ASI<(ops SPR:$dst, SPR:$a),
+ "fnegs $dst, $a",
+ [(set SPR:$dst, (fneg SPR:$a))]>;
+
+def FSQRTD : ADI<(ops DPR:$dst, DPR:$a),
+ "fsqrtd $dst, $a",
+ [(set DPR:$dst, (fsqrt DPR:$a))]>;
+
+def FSQRTS : ASI<(ops SPR:$dst, SPR:$a),
+ "fsqrts $dst, $a",
+ [(set SPR:$dst, (fsqrt SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies. Int <-> FP Conversions.
+//
+
+def IMPLICIT_DEF_SPR : PseudoInst<(ops SPR:$rD),
+ "@ IMPLICIT_DEF_SPR $rD",
+ [(set SPR:$rD, (undef))]>;
+def IMPLICIT_DEF_DPR : PseudoInst<(ops DPR:$rD),
+ "@ IMPLICIT_DEF_DPR $rD",
+ [(set DPR:$rD, (undef))]>;
+
+def FMRS : ASI<(ops GPR:$dst, SPR:$src),
+ "fmrs $dst, $src",
+ [(set GPR:$dst, (bitconvert SPR:$src))]>;
+
+def FMSR : ASI<(ops SPR:$dst, GPR:$src),
+ "fmsr $dst, $src",
+ [(set SPR:$dst, (bitconvert GPR:$src))]>;
+
+
+def FMRRD : ADI<(ops GPR:$dst1, GPR:$dst2, DPR:$src),
+ "fmrrd $dst1, $dst2, $src",
+ [/* FIXME: Can't write pattern for multiple result instr*/]>;
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def FMDRR : ADI<(ops DPR:$dst, GPR:$src1, GPR:$src2),
+ "fmdrr $dst, $src1, $src2",
+ [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX : SPR system reg -> GPR
+
+// FMSRR: GPR -> SPR
+
+
+def FMSTAT : ASI<(ops), "fmstat", [(arm_fmstat)]>;
+
+// FMXR: GPR -> VFP Sstem reg
+
+
+// Int to FP:
+
+def FSITOD : ADI<(ops DPR:$dst, SPR:$a),
+ "fsitod $dst, $a",
+ [(set DPR:$dst, (arm_sitof SPR:$a))]>;
+
+def FSITOS : ASI<(ops SPR:$dst, SPR:$a),
+ "fsitos $dst, $a",
+ [(set SPR:$dst, (arm_sitof SPR:$a))]>;
+
+def FUITOD : ADI<(ops DPR:$dst, SPR:$a),
+ "fuitod $dst, $a",
+ [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+
+def FUITOS : ASI<(ops SPR:$dst, SPR:$a),
+ "fuitos $dst, $a",
+ [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+
+// FP to Int:
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+
+def FTOSIZD : ADI<(ops SPR:$dst, DPR:$a),
+ "ftosizd $dst, $a",
+ [(set SPR:$dst, (arm_ftosi DPR:$a))]>;
+
+def FTOSIZS : ASI<(ops SPR:$dst, SPR:$a),
+ "ftosizs $dst, $a",
+ [(set SPR:$dst, (arm_ftosi SPR:$a))]>;
+
+def FTOUIZD : ADI<(ops SPR:$dst, DPR:$a),
+ "ftouizd $dst, $a",
+ [(set SPR:$dst, (arm_ftoui DPR:$a))]>;
+
+def FTOUIZS : ASI<(ops SPR:$dst, SPR:$a),
+ "ftouizs $dst, $a",
+ [(set SPR:$dst, (arm_ftoui SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def FMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fmacd $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fmacs $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fmscd $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fmscs $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmacd $dst, $a, $b",
+ [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMACS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmacs $dst, $a, $b",
+ [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMSCD : ADI<(ops DPR:$dst, DPR:$dstin, DPR:$a, DPR:$b),
+ "fnmscd $dst, $a, $b",
+ [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+def FNMSCS : ASI<(ops SPR:$dst, SPR:$dstin, SPR:$a, SPR:$b),
+ "fnmscs $dst, $a, $b",
+ [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+ RegConstraint<"$dstin = $dst">;
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+def FCPYDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
+ "fcpyd$cc $dst, $true",
+ [(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+def FCPYScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
+ "fcpys$cc $dst, $true",
+ [(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGDcc : ADI<(ops DPR:$dst, DPR:$false, DPR:$true, CCOp:$cc),
+ "fnegd$cc $dst, $true",
+ [(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
+
+def FNEGScc : ASI<(ops SPR:$dst, SPR:$false, SPR:$true, CCOp:$cc),
+ "fnegs$cc $dst, $true",
+ [(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))]>,
+ RegConstraint<"$false = $dst">;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000000..704898e8aa
--- /dev/null
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,628 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM load / store optimization pass";
+ }
+
+ private:
+ struct MemOpQueueEntry {
+ int Offset;
+ unsigned Position;
+ MachineBasicBlock::iterator MBBI;
+ bool Merged;
+ MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+ : Offset(o), Position(p), MBBI(i), Merged(false) {};
+ };
+ typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+ typedef MemOpQueue::iterator MemOpQueueIter;
+
+ SmallVector<MachineBasicBlock::iterator, 4>
+ MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+ int Opcode, unsigned Size, MemOpQueue &MemOps);
+
+ bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+ bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ };
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+ return new ARMLoadStoreOpt();
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode) {
+ switch (Opcode) {
+ case ARM::LDR:
+ NumLDMGened++;
+ return ARM::LDM;
+ case ARM::STR:
+ NumSTMGened++;
+ return ARM::STM;
+ case ARM::FLDS:
+ NumFLDMGened++;
+ return ARM::FLDMS;
+ case ARM::FSTS:
+ NumFSTMGened++;
+ return ARM::FSTMS;
+ case ARM::FLDD:
+ NumFLDMGened++;
+ return ARM::FLDMD;
+ case ARM::FSTD:
+ NumFSTMGened++;
+ return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ int Offset, unsigned Base, int Opcode,
+ SmallVector<unsigned, 8> &Regs,
+ const TargetInstrInfo *TII) {
+ // Only a single register to load / store. Don't bother.
+ unsigned NumRegs = Regs.size();
+ if (NumRegs <= 1)
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::ia;
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if (isAM4 && Offset == 4)
+ Mode = ARM_AM::ib;
+ else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+ Mode = ARM_AM::da;
+ else if (isAM4 && Offset == -4 * (int)NumRegs)
+ Mode = ARM_AM::db;
+ else if (Offset != 0) {
+ // If starting offset isn't zero, insert a MI to materialize a new base.
+ // But only do so if it is cost effective, i.e. merging more than two
+ // loads / stores.
+ if (NumRegs <= 2)
+ return false;
+
+ unsigned NewBase;
+ if (Opcode == ARM::LDR)
+ // If it is a load, then just use one of the destination register to
+ // use as the new base.
+ NewBase = Regs[NumRegs-1];
+ else {
+ // FIXME: Try scavenging a register to use as a new base.
+ NewBase = ARM::R12;
+ }
+ int BaseOpc = ARM::ADDri;
+ if (Offset < 0) {
+ BaseOpc = ARM::SUBri;
+ Offset = - Offset;
+ }
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset == -1)
+ return false; // Probably not worth it then.
+ BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase).addReg(Base).addImm(ImmedOffset);
+ Base = NewBase;
+ }
+
+ bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+ bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ Opcode = getLoadStoreMultipleOpcode(Opcode);
+ MachineInstrBuilder MIB = (isAM4)
+ ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base)
+ .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs));
+ for (unsigned i = 0; i != NumRegs; ++i)
+ MIB = MIB.addReg(Regs[i], Opcode == isDef);
+
+ return true;
+}
+
+SmallVector<MachineBasicBlock::iterator, 4>
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB,
+ unsigned SIndex, unsigned Base, int Opcode,
+ unsigned Size, MemOpQueue &MemOps) {
+ bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ SmallVector<MachineBasicBlock::iterator, 4> Merges;
+ int Offset = MemOps[SIndex].Offset;
+ int SOffset = Offset;
+ unsigned Pos = MemOps[SIndex].Position;
+ MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+ SmallVector<unsigned, 8> Regs;
+ unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+ unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+ Regs.push_back(PReg);
+ for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+ int NewOffset = MemOps[i].Offset;
+ unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+ unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+ // AM4 - register numbers in ascending order.
+ // AM5 - consecutive register numbers in ascending order.
+ if (NewOffset == Offset + (int)Size &&
+ ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ Offset += Size;
+ Regs.push_back(Reg);
+ PRegNum = RegNum;
+ } else {
+ // Can't merge this in. Try merge the earlier ones first.
+ if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned j = SIndex; j < i; ++j) {
+ MBB.erase(MemOps[j].MBBI);
+ MemOps[j].Merged = true;
+ }
+ }
+ SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+ MergeLDR_STR(MBB, i, Base, Opcode, Size, MemOps);
+ Merges.append(Merges2.begin(), Merges2.end());
+ return Merges;
+ }
+
+ if (MemOps[i].Position > Pos) {
+ Pos = MemOps[i].Position;
+ Loc = MemOps[i].MBBI;
+ }
+ }
+
+ if (mergeOps(MBB, ++Loc, SOffset, Base, Opcode, Regs, TII)) {
+ Merges.push_back(prior(Loc));
+ for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+ MBB.erase(MemOps[i].MBBI);
+ MemOps[i].Merged = true;
+ }
+ }
+
+ return Merges;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes) {
+ return (MI && MI->getOpcode() == ARM::SUBri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+ unsigned Bytes) {
+ return (MI && MI->getOpcode() == ARM::ADDri &&
+ MI->getOperand(0).getReg() == Base &&
+ MI->getOperand(1).getReg() == Base &&
+ ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return 0;
+ case ARM::LDR:
+ case ARM::STR:
+ case ARM::FLDS:
+ case ARM::FSTS:
+ return 4;
+ case ARM::FLDD:
+ case ARM::FSTD:
+ return 8;
+ case ARM::LDM:
+ case ARM::STM:
+ return (MI->getNumOperands() - 2) * 4;
+ case ARM::FLDMS:
+ case ARM::FSTMS:
+ case ARM::FLDMD:
+ case ARM::FSTMD:
+ return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ }
+}
+
+/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(0).getReg();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+
+ if (isAM4) {
+ if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ // Can't use the updating AM4 sub-mode if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
+ }
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ MBB.erase(NextMBBI);
+ return true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+ MBB.erase(NextMBBI);
+ return true;
+ }
+ }
+ } else {
+ // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+ return false;
+
+ ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+ unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+ MBB.erase(PrevMBBI);
+ return true;
+ }
+ }
+
+ if (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (Mode == ARM_AM::ia &&
+ isMatchingIncrement(NextMBBI, Base, Bytes)) {
+ MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+ MBB.erase(NextMBBI);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_PRE;
+ case ARM::STR: return ARM::STR_PRE;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+ switch (Opc) {
+ case ARM::LDR: return ARM::LDR_POST;
+ case ARM::STR: return ARM::STR_POST;
+ case ARM::FLDS: return ARM::FLDMS;
+ case ARM::FLDD: return ARM::FLDMD;
+ case ARM::FSTS: return ARM::FSTMS;
+ case ARM::FSTD: return ARM::FSTMD;
+ default: abort();
+ }
+ return 0;
+}
+
+/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const TargetInstrInfo *TII) {
+ MachineInstr *MI = MBBI;
+ unsigned Base = MI->getOperand(1).getReg();
+ unsigned Bytes = getLSMultipleTransferSize(MI);
+ int Opcode = MI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+ (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+ return false;
+
+ bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ // Can't do the merge if the destination register is the same as the would-be
+ // writeback register.
+ if (isLd && MI->getOperand(0).getReg() == Base)
+ return false;
+
+ bool DoMerge = false;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ unsigned NewOpc = 0;
+ if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ if (isMatchingDecrement(PrevMBBI, Base, Bytes)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) {
+ DoMerge = true;
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(PrevMBBI);
+ }
+
+ if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) {
+ DoMerge = true;
+ AddSub = ARM_AM::sub;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ } else if (isMatchingIncrement(NextMBBI, Base, Bytes)) {
+ DoMerge = true;
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+ }
+ if (DoMerge)
+ MBB.erase(NextMBBI);
+ }
+
+ if (!DoMerge)
+ return false;
+
+ bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+ : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+ true, isDPR ? 2 : 1);
+ if (isLd) {
+ if (isAM2)
+ BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg())
+ .addReg(Base, true).addReg(Base).addReg(0).addImm(Offset);
+ else
+ BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
+ .addImm(Offset).addReg(MI->getOperand(0).getReg(), true);
+ } else {
+ if (isAM2)
+ BuildMI(MBB, MBBI, TII->get(NewOpc), Base).addReg(MI->getOperand(0).getReg())
+ .addReg(Base).addReg(0).addImm(Offset);
+ else
+ BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base)
+ .addImm(Offset).addReg(MI->getOperand(0).getReg(), false);
+ }
+ MBB.erase(MBBI);
+
+ return true;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+ unsigned NumMerges = 0;
+ unsigned NumMemOps = 0;
+ MemOpQueue MemOps;
+ unsigned CurrBase = 0;
+ int CurrOpc = -1;
+ unsigned CurrSize = 0;
+ unsigned Position = 0;
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ bool Advance = false;
+ bool TryMerge = false;
+ bool Clobber = false;
+
+ int Opcode = MBBI->getOpcode();
+ bool isMemOp = false;
+ bool isAM2 = false;
+ unsigned Size = 4;
+ switch (Opcode) {
+ case ARM::LDR:
+ case ARM::STR:
+ isMemOp =
+ (MBBI->getOperand(1).isRegister() && MBBI->getOperand(2).getReg() == 0);
+ isAM2 = true;
+ break;
+ case ARM::FLDS:
+ case ARM::FSTS:
+ isMemOp = MBBI->getOperand(1).isRegister();
+ break;
+ case ARM::FLDD:
+ case ARM::FSTD:
+ isMemOp = MBBI->getOperand(1).isRegister();
+ Size = 8;
+ break;
+ }
+ if (isMemOp) {
+ unsigned Base = MBBI->getOperand(1).getReg();
+ unsigned OffIdx = MBBI->getNumOperands()-1;
+ unsigned OffField = MBBI->getOperand(OffIdx).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ // Watch out for:
+ // r4 := ldr [r5]
+ // r5 := ldr [r5, #4]
+ // r6 := ldr [r5, #8]
+ //
+ // The second ldr has effectively broken the chain even though it
+ // looks like the later ldr(s) use the same base register. Try to
+ // merge the ldr's so far, including this one. But don't try to
+ // combine the following ldr(s).
+ Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+ if (CurrBase == 0 && !Clobber) {
+ // Start of a new chain.
+ CurrBase = Base;
+ CurrOpc = Opcode;
+ CurrSize = Size;
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ if (Clobber) {
+ TryMerge = true;
+ Advance = true;
+ }
+
+ if (CurrOpc == Opcode && CurrBase == Base) {
+ // Continue adding to the queue.
+ if (Offset > MemOps.back().Offset) {
+ MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ } else {
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+ NumMemOps++;
+ Advance = true;
+ break;
+ } else if (Offset == I->Offset) {
+ // Collision! This can't be merged!
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (Advance) {
+ ++Position;
+ ++MBBI;
+ } else
+ TryMerge = true;
+
+ if (TryMerge) {
+ if (NumMemOps > 1) {
+ SmallVector<MachineBasicBlock::iterator,4> MBBII =
+ MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,MemOps);
+ // Try folding preceeding/trailing base inc/dec into the generated
+ // LDM/STM ops.
+ for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+ if (mergeBaseUpdateLSMultiple(MBB, MBBII[i]))
+ NumMerges++;
+ NumMerges += MBBII.size();
+ }
+
+ // Try folding preceeding/trailing base inc/dec into those load/store
+ // that were not merged to form LDM/STM ops.
+ for (unsigned i = 0; i != NumMemOps; ++i)
+ if (!MemOps[i].Merged)
+ if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII))
+ NumMerges++;
+
+ CurrBase = 0;
+ CurrOpc = -1;
+ if (NumMemOps) {
+ MemOps.clear();
+ NumMemOps = 0;
+ }
+
+ // If iterator hasn't been advanced and this is not a memory op, skip it.
+ // It can't start a new chain anyway.
+ if (!Advance && !isMemOp && MBBI != E) {
+ ++Position;
+ ++MBBI;
+ }
+ }
+ }
+ return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+/// (bx lr) into the preceeding stack restore so it directly restore the value
+/// of LR into pc.
+/// ldmfd sp!, {r7, lr}
+/// bx lr
+/// =>
+/// ldmfd sp!, {r7, pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ if (MBB.empty()) return false;
+
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+ MachineInstr *PrevMI = prior(MBBI);
+ if (PrevMI->getOpcode() == ARM::LDM) {
+ MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+ if (MO.getReg() == ARM::LR) {
+ PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET));
+ MO.setReg(ARM::PC);
+ MBB.erase(MBBI);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ Modified |= LoadStoreMultipleOpti(MBB);
+ Modified |= MergeReturnIntoLDM(MBB);
+ }
+ return Modified;
+}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000000..4de3dbc410
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,136 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by the Evan Cheng and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunction private
+/// ARM target-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+ /// isThumb - True if this function is compiled under Thumb mode.
+ ///
+ bool isThumb;
+
+ /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+ ///
+ unsigned VarArgsRegSaveSize;
+
+ /// FramePtrSpilled - True if FP register is spilled. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool FramePtrSpilled;
+
+ /// FramePtrSpillOffset - If FramePtrSpilled, this records the frame pointer
+ /// spill stack offset.
+ unsigned FramePtrSpillOffset;
+
+ /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+ /// register spills areas. For Mac OS X:
+ ///
+ /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+ /// --------------------------------------------
+ /// GPR callee-saved (2) : r8, r10, r11
+ /// --------------------------------------------
+ /// DPR callee-saved : d8 - d15
+ unsigned GPRCS1Offset;
+ unsigned GPRCS2Offset;
+ unsigned DPRCSOffset;
+
+ /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+ /// areas.
+ unsigned GPRCS1Size;
+ unsigned GPRCS2Size;
+ unsigned DPRCSSize;
+
+ /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+ /// which belong to these spill areas.
+ std::set<int> GPRCS1Frames;
+ std::set<int> GPRCS2Frames;
+ std::set<int> DPRCSFrames;
+
+ /// JumpTableUId - Unique id for jumptables.
+ ///
+ unsigned JumpTableUId;
+
+public:
+ ARMFunctionInfo() :
+ isThumb(false),
+ VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
+ GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
+
+ ARMFunctionInfo(MachineFunction &MF) :
+ isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ VarArgsRegSaveSize(0), FramePtrSpilled(false), FramePtrSpillOffset(0),
+ GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0) {}
+
+ bool isThumbFunction() const { return isThumb; }
+
+ unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+ void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+ bool isFramePtrSpilled() const { return FramePtrSpilled; }
+ void setFramePtrSpilled(bool s) { FramePtrSpilled = s; }
+ unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+ void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+ unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
+
+ void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+ void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+
+ unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+ unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+
+ void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+ void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
+
+ bool isGPRCalleeSavedArea1Frame(unsigned fi) const {
+ return GPRCS1Frames.count(fi);
+ }
+ bool isGPRCalleeSavedArea2Frame(unsigned fi) const {
+ return GPRCS2Frames.count(fi);
+ }
+ bool isDPRCalleeSavedAreaFrame(unsigned fi) const {
+ return DPRCSFrames.count(fi);
+ }
+
+ void addGPRCalleeSavedArea1Frame(unsigned fi) {
+ GPRCS1Frames.insert(fi);
+ }
+ void addGPRCalleeSavedArea2Frame(unsigned fi) {
+ GPRCS2Frames.insert(fi);
+ }
+ void addDPRCalleeSavedAreaFrame(unsigned fi) {
+ DPRCSFrames.insert(fi);
+ }
+
+ unsigned createJumpTableUId() {
+ return JumpTableUId++;
+ }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMMul.cpp b/lib/Target/ARM/ARMMul.cpp
deleted file mode 100644
index c4eeaac479..0000000000
--- a/lib/Target/ARM/ARMMul.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- ARMMul.cpp - Define TargetMachine for A5CRM -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file was developed by the "Instituto Nokia de Tecnologia" and
-// is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Modify the ARM multiplication instructions so that Rd{Hi,Lo} and Rm are distinct
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
- class VISIBILITY_HIDDEN FixMul : public MachineFunctionPass {
- virtual bool runOnMachineFunction(MachineFunction &MF);
- };
-}
-
-FunctionPass *llvm::createARMFixMulPass() { return new FixMul(); }
-
-bool FixMul::runOnMachineFunction(MachineFunction &MF) {
- bool Changed = false;
-
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
- BB != E; ++BB) {
- MachineBasicBlock &MBB = *BB;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr *MI = I;
-
- int Op = MI->getOpcode();
- if (Op == ARM::MUL ||
- Op == ARM::SMULL ||
- Op == ARM::UMULL) {
- MachineOperand &RdOp = MI->getOperand(0);
- MachineOperand &RmOp = MI->getOperand(1);
- MachineOperand &RsOp = MI->getOperand(2);
-
- unsigned Rd = RdOp.getReg();
- unsigned Rm = RmOp.getReg();
- unsigned Rs = RsOp.getReg();
-
- if (Rd == Rm) {
- Changed = true;
- if (Rd != Rs) {
- //Rd and Rm must be distinct, but Rd can be equal to Rs.
- //Swap Rs and Rm
- RmOp.setReg(Rs);
- RsOp.setReg(Rm);
- } else {
- unsigned scratch = Op == ARM::MUL ? ARM::R12 : ARM::R0;
- BuildMI(MBB, I, MF.getTarget().getInstrInfo()->get(ARM::MOV),
- scratch).addReg(Rm).addImm(0).addImm(ARMShift::LSL);
- RmOp.setReg(scratch);
- }
- }
- }
- }
- }
-
- return Changed;
-}
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 16a1a29660..5ae482babc 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -13,243 +13,1023 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
-#include "ARMCommon.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/Type.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <iostream>
using namespace llvm;
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register. This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-static bool hasFP(const MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return NoFramePointerElim || MFI->hasVarSizedObjects();
-}
-
-static void splitInstructionWithImmediate(MachineBasicBlock &BB,
- MachineBasicBlock::iterator I,
- const TargetInstrDescriptor &TID,
- unsigned DestReg,
- unsigned OrigReg,
- unsigned immediate){
- std::vector<unsigned> immediatePieces = splitImmediate(immediate);
- std::vector<unsigned>::iterator it;
- for (it=immediatePieces.begin(); it != immediatePieces.end(); ++it){
- BuildMI(BB, I, TID, DestReg).addReg(OrigReg)
- .addImm(*it).addImm(0).addImm(ARMShift::LSL);
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace ARM;
+ switch (RegEnum) {
+ case R0: case S0: case D0: return 0;
+ case R1: case S1: case D1: return 1;
+ case R2: case S2: case D2: return 2;
+ case R3: case S3: case D3: return 3;
+ case R4: case S4: case D4: return 4;
+ case R5: case S5: case D5: return 5;
+ case R6: case S6: case D6: return 6;
+ case R7: case S7: case D7: return 7;
+ case R8: case S8: case D8: return 8;
+ case R9: case S9: case D9: return 9;
+ case R10: case S10: case D10: return 10;
+ case R11: case S11: case D11: return 11;
+ case R12: case S12: case D12: return 12;
+ case SP: case S13: case D13: return 13;
+ case LR: case S14: case D14: return 14;
+ case PC: case S15: case D15: return 15;
+ case S16: return 16;
+ case S17: return 17;
+ case S18: return 18;
+ case S19: return 19;
+ case S20: return 20;
+ case S21: return 21;
+ case S22: return 22;
+ case S23: return 23;
+ case S24: return 24;
+ case S25: return 25;
+ case S26: return 26;
+ case S27: return 27;
+ case S28: return 28;
+ case S29: return 29;
+ case S30: return 30;
+ case S31: return 31;
+ default:
+ std::cerr << "Unknown ARM register!\n";
+ abort();
}
}
-ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii)
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+ const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
- TII(tii) {
+ TII(tii), STI(sti),
+ FramePtr(STI.useThumbBacktraces() ? ARM::R7 : ARM::R11) {
+}
+
+bool ARMRegisterInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, TII.get(ARM::tPUSH));
+ for (unsigned i = CSI.size(); i != 0; --i)
+ MIB.addReg(CSI[i-1].getReg());
+ return true;
+}
+
+bool ARMRegisterInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->isThumbFunction() || CSI.empty())
+ return false;
+
+ MachineInstr *PopMI = new MachineInstr(TII.get(ARM::tPOP));
+ MBB.insert(MI, PopMI);
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (Reg == ARM::LR) {
+ Reg = ARM::PC;
+ PopMI->setInstrDescriptor(TII.get(ARM::tPOP_RET));
+ MBB.erase(MI);
+ }
+ PopMI->addRegOperand(Reg, true);
+ }
+ return true;
}
void ARMRegisterInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, int FI,
const TargetRegisterClass *RC) const {
- assert (RC == ARM::IntRegsRegisterClass);
- BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI).addImm(0);
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction())
+ BuildMI(MBB, I, TII.get(ARM::tSTRspi)).addReg(SrcReg)
+ .addFrameIndex(FI).addImm(0);
+ else
+ BuildMI(MBB, I, TII.get(ARM::STR)).addReg(SrcReg)
+ .addFrameIndex(FI).addReg(0).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ BuildMI(MBB, I, TII.get(ARM::FSTD)).addReg(SrcReg)
+ .addFrameIndex(FI).addImm(0);
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ BuildMI(MBB, I, TII.get(ARM::FSTS)).addReg(SrcReg)
+ .addFrameIndex(FI).addImm(0);
+ }
}
void ARMRegisterInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC) const {
- assert (RC == ARM::IntRegsRegisterClass);
- BuildMI(MBB, I, TII.get(ARM::LDR), DestReg).addFrameIndex(FI).addImm(0);
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumbFunction())
+ BuildMI(MBB, I, TII.get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ else
+ BuildMI(MBB, I, TII.get(ARM::LDR), DestReg)
+ .addFrameIndex(FI).addReg(0).addImm(0);
+ } else if (RC == ARM::DPRRegisterClass) {
+ BuildMI(MBB, I, TII.get(ARM::FLDD), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ } else {
+ assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+ BuildMI(MBB, I, TII.get(ARM::FLDS), DestReg)
+ .addFrameIndex(FI).addImm(0);
+ }
}
void ARMRegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *RC) const {
- assert(RC == ARM::IntRegsRegisterClass ||
- RC == ARM::FPRegsRegisterClass ||
- RC == ARM::DFPRegsRegisterClass);
-
- if (RC == ARM::IntRegsRegisterClass)
- BuildMI(MBB, I, TII.get(ARM::MOV), DestReg).addReg(SrcReg).addImm(0)
- .addImm(ARMShift::LSL);
- else if (RC == ARM::FPRegsRegisterClass)
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *RC) const {
+ if (RC == ARM::GPRRegisterClass) {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ BuildMI(MBB, I, TII.get(AFI->isThumbFunction() ? ARM::tMOVrr : ARM::MOVrr),
+ DestReg).addReg(SrcReg);
+ } else if (RC == ARM::SPRRegisterClass)
BuildMI(MBB, I, TII.get(ARM::FCPYS), DestReg).addReg(SrcReg);
- else
+ else if (RC == ARM::DPRRegisterClass)
BuildMI(MBB, I, TII.get(ARM::FCPYD), DestReg).addReg(SrcReg);
+ else
+ abort();
}
-MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr* MI,
- unsigned OpNum,
- int FI) const {
- return NULL;
+MachineInstr *ARMRegisterInfo::foldMemoryOperand(MachineInstr *MI,
+ unsigned OpNum, int FI) const {
+ unsigned Opc = MI->getOpcode();
+ MachineInstr *NewMI = NULL;
+ switch (Opc) {
+ default: break;
+ case ARM::MOVrr: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::STR)).addReg(SrcReg).addFrameIndex(FI)
+ .addReg(0).addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::LDR), DstReg).addFrameIndex(FI).addReg(0)
+ .addImm(0);
+ }
+ break;
+ }
+ case ARM::tMOVrr: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::tSTRspi)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::tLDRspi), DstReg).addFrameIndex(FI)
+ .addImm(0);
+ }
+ break;
+ }
+ case ARM::FCPYS: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::FSTS)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::FLDS), DstReg).addFrameIndex(FI).addImm(0);
+ }
+ break;
+ }
+ case ARM::FCPYD: {
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ NewMI = BuildMI(TII.get(ARM::FSTD)).addReg(SrcReg).addFrameIndex(FI)
+ .addImm(0);
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ NewMI = BuildMI(TII.get(ARM::FLDD), DstReg).addFrameIndex(FI).addImm(0);
+ }
+ break;
+ }
+ }
+
+ if (NewMI)
+ NewMI->copyKillDeadInfo(MI);
+ return NewMI;
}
const unsigned* ARMRegisterInfo::getCalleeSavedRegs() const {
static const unsigned CalleeSavedRegs[] = {
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10, ARM::R11,
- ARM::R14, 0
+ ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+ ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
};
- return CalleeSavedRegs;
+
+ static const unsigned DarwinCalleeSavedRegs[] = {
+ ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4,
+ ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+ ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+ ARM::D11, ARM::D10, ARM::D9, ARM::D8,
+ 0
+ };
+ return STI.isDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
}
const TargetRegisterClass* const *
ARMRegisterInfo::getCalleeSavedRegClasses() const {
static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass,
- &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass, &ARM::IntRegsRegClass,
- &ARM::IntRegsRegClass, 0
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+ &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+ 0
};
return CalleeSavedRegClasses;
}
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register. This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+static bool hasFP(const MachineFunction &MF) {
+ return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitARMRegPlusImmediate - Emit a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ if (isSub) NumBytes = -NumBytes;
+
+ while (NumBytes) {
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+ unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ assert(ThisVal && "Didn't extract field correctly");
+
+ // We will handle these bits from offset, clear them.
+ NumBytes &= ~ThisVal;
+
+ // Get the properly encoded SOImmVal field.
+ int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+ assert(SOImmVal != -1 && "Bit extraction didn't work?");
+
+ // Build the new ADD / SUB.
+ BuildMI(MBB, MBBI, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+ .addReg(BaseReg).addImm(SOImmVal);
+ BaseReg = DestReg;
+ }
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+static bool isLowRegister(unsigned Reg) {
+ using namespace ARM;
+ switch (Reg) {
+ case R0: case R1: case R2: case R3:
+ case R4: case R5: case R6: case R7:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// emitThumbRegPlusImmediate - Emit a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, unsigned BaseReg,
+ int NumBytes, const TargetInstrInfo &TII) {
+ bool isSub = NumBytes < 0;
+ unsigned Bytes = (unsigned)NumBytes;
+ if (isSub) Bytes = -NumBytes;
+ bool isMul4 = (Bytes & 3) == 0;
+ bool isTwoAddr = false;
+ unsigned NumBits = 1;
+ unsigned Opc = 0;
+ unsigned ExtraOpc = 0;
+
+ if (DestReg == BaseReg && BaseReg == ARM::SP) {
+ assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+ Bytes >>= 2; // Implicitly multiplied by 4.
+ NumBits = 7;
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ isTwoAddr = true;
+ } else if (!isSub && BaseReg == ARM::SP) {
+ if (!isMul4) {
+ Bytes &= ~3;
+ ExtraOpc = ARM::tADDi3;
+ }
+ Bytes >>= 2; // Implicitly multiplied by 4.
+ NumBits = 8;
+ Opc = ARM::tADDrSPi;
+ } else {
+ if (DestReg != BaseReg) {
+ if (isLowRegister(DestReg) && isLowRegister(BaseReg)) {
+ // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+ unsigned Chunk = (1 << 3) - 1;
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ BuildMI(MBB, MBBI, TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+ .addReg(BaseReg).addImm(ThisVal);
+ } else {
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVrr), DestReg).addReg(BaseReg);
+ }
+ BaseReg = DestReg;
+ }
+ NumBits = 8;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+
+ unsigned Chunk = (1 << NumBits) - 1;
+ while (Bytes) {
+ unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+ Bytes -= ThisVal;
+ // Build the new tADD / tSUB.
+ if (isTwoAddr)
+ BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addImm(ThisVal);
+ else {
+ BuildMI(MBB, MBBI, TII.get(Opc), DestReg).addReg(BaseReg).addImm(ThisVal);
+ BaseReg = DestReg;
+
+ if (Opc == ARM::tADDrSPi) {
+ // r4 = add sp, imm
+ // r4 = add r4, imm
+ // ...
+ NumBits = 8;
+ Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+ isTwoAddr = true;
+ }
+ }
+ }
+
+ if (ExtraOpc)
+ BuildMI(MBB, MBBI, TII.get(ExtraOpc), DestReg).addReg(DestReg)
+ .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+ int NumBytes, bool isThumb, const TargetInstrInfo &TII) {
+ if (isThumb)
+ emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII);
+ else
+ emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII);
+}
+
void ARMRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- if (hasFP(MF)) {
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ // If we have alloca, convert as follows:
+ // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+ // ADJCALLSTACKUP -> add, sp, sp, amount
MachineInstr *Old = I;
unsigned Amount = Old->getOperand(0).getImmedValue();
if (Amount != 0) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ // We need to keep the stack aligned properly. To do this, we round the
+ // amount of space needed for the outgoing arguments up to the next
+ // alignment boundary.
unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
+ // Replace the pseudo instruction with a new instruction...
if (Old->getOpcode() == ARM::ADJCALLSTACKDOWN) {
- // sub sp, sp, amount
- splitInstructionWithImmediate(MBB, I, TII.get(ARM::SUB), ARM::R13,
- ARM::R13, Amount);
+ emitSPUpdate(MBB, I, -Amount, AFI->isThumbFunction(), TII);
} else {
- // add sp, sp, amount
assert(Old->getOpcode() == ARM::ADJCALLSTACKUP);
- splitInstructionWithImmediate(MBB, I, TII.get(ARM::ADD), ARM::R13,
- ARM::R13, Amount);
+ emitSPUpdate(MBB, I, Amount, AFI->isThumbFunction(), TII);
}
}
}
MBB.erase(I);
}
-void
-ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const {
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ unsigned DestReg, int Imm,
+ const TargetInstrInfo &TII) {
+ bool isSub = Imm < 0;
+ if (isSub) Imm = -Imm;
+
+ int Chunk = (1 << 8) - 1;
+ int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+ Imm -= ThisVal;
+ BuildMI(MBB, MBBI, TII.get(ARM::tMOVri8), DestReg).addImm(ThisVal);
+ if (Imm > 0)
+ emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII);
+ if (isSub)
+ BuildMI(MBB, MBBI, TII.get(ARM::tNEG), DestReg).addReg(DestReg);
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{
+ unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
- assert (MI.getOpcode() == ARM::LDR ||
- MI.getOpcode() == ARM::STR ||
- MI.getOpcode() == ARM::ADD);
+ while (!MI.getOperand(i).isFrameIndex()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getFrameIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize();
- unsigned FrameIdx = 1;
- unsigned OffIdx = 2;
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+ Offset -= AFI->getDPRCalleeSavedAreaOffset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ // There is alloca()'s in this function, must reference off the frame
+ // pointer instead.
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ }
+
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDescriptor &Desc = TII.get(Opcode);
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ bool isSub = false;
+
+ if (Opcode == ARM::ADDri) {
+ Offset += MI.getOperand(i+1).getImm();
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setInstrDescriptor(TII.get(ARM::MOVrr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ } else if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ MI.setInstrDescriptor(TII.get(ARM::SUBri));
+ }
- int FrameIndex = MI.getOperand(FrameIdx).getFrameIndex();
+ // Common case: small offset, fits into instruction.
+ int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+ if (ImmedOffset != -1) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+ return;
+ }
+
+ // Otherwise, we fallback to common code below to form the imm offset with
+ // a sequence of ADDri instructions. First though, pull as much of the imm
+ // into this ADDri as possible.
+ unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+ unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, (32-RotAmt) & 31);
+
+ // We will handle these bits from offset, clear them.
+ Offset &= ~ThisImmVal;
+
+ // Get the properly encoded SOImmVal field.
+ int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+ assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
+ MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+ } else if (Opcode == ARM::tADDrSPi) {
+ Offset += MI.getOperand(i+1).getImm();
+ assert((Offset & 3) == 0 &&
+ "add/sub sp, #imm immediate must be multiple of 4!");
+ Offset >>= 2;
+ if (Offset == 0) {
+ // Turn it into a move.
+ MI.setInstrDescriptor(TII.get(ARM::tMOVrr));
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.RemoveOperand(i+1);
+ return;
+ }
+
+ // Common case: small offset, fits into instruction.
+ if ((Offset & ~255U) == 0) {
+ // Replace the FrameIndex with sp / fp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ unsigned DestReg = MI.getOperand(0).getReg();
+ if (Offset > 0) {
+ // Translate r0 = add sp, imm to
+ // r0 = add sp, 255*4
+ // r0 = add r0, (imm - 255*4)
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(i+1).ChangeToImmediate(255);
+ Offset = (Offset - 255) << 2;
+ MachineBasicBlock::iterator NII = next(II);
+ emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII);
+ } else {
+ // Translate r0 = add sp, -imm to
+ // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = add r0, sp
+ Offset <<= 2;
+ emitThumbConstant(MBB, II, DestReg, Offset, TII);
+ MI.setInstrDescriptor(TII.get(ARM::tADDhirr));
+ MI.getOperand(i).ChangeToRegister(DestReg, false);
+ MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ }
+ return;
+ } else {
+ unsigned ImmIdx = 0;
+ int InstrOffs = 0;
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ switch (AddrMode) {
+ case ARMII::AddrMode2: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 12;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = i+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ break;
+ }
+ case ARMII::AddrMode5: {
+ ImmIdx = i+1;
+ InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs *= -1;
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrModeTs: {
+ ImmIdx = i+1;
+ InstrOffs = MI.getOperand(ImmIdx).getImm();
+ NumBits = 8;
+ Scale = 4;
+ break;
+ }
+ default:
+ std::cerr << "Unsupported addressing mode!\n";
+ abort();
+ break;
+ }
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(OffIdx).getImmedValue();
+ Offset += InstrOffs * Scale;
+ assert((Scale == 1 || (Offset & (Scale-1)) == 0) &&
+ "Can't encode this offset!");
+ if (Offset < 0) {
+ Offset = -Offset;
+ isSub = true;
+ }
- unsigned StackSize = MF.getFrameInfo()->getStackSize();
+ MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+ int ImmedOffset = Offset / Scale;
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale) {
+ // Replace the FrameIndex with sp
+ MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ return;
+ }
- Offset += StackSize;
+ // Otherwise, it didn't fit. Pull in what we can to simplify the immediate.
+ ImmedOffset = ImmedOffset & Mask;
+ if (isSub)
+ ImmedOffset |= 1 << NumBits;
+ ImmOp.ChangeToImmediate(ImmedOffset);
+ Offset &= ~(Mask*Scale);
+ }
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above, handle the rest, providing a register that is
+ // SP+LargeImm.
+ assert(Offset && "This code isn't needed if offset already handled!");
- assert (Offset >= 0);
- unsigned BaseRegister = hasFP(MF) ? ARM::R11 : ARM::R13;
- if (Offset < 4096) {
- // Replace the FrameIndex with r13
- MI.getOperand(FrameIdx).ChangeToRegister(BaseRegister, false);
- // Replace the ldr offset with Offset
- MI.getOperand(OffIdx).ChangeToImmediate(Offset);
+ if (isThumb) {
+ if (TII.isLoad(Opcode)) {
+ // Use the destination register to materialize sp + offset.
+ unsigned TmpReg = MI.getOperand(0).getReg();
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg,
+ isSub ? -Offset : Offset, TII);
+ MI.getOperand(i).ChangeToRegister(TmpReg, false);
+ } else if (TII.isStore(Opcode)) {
+ // FIXME! This is horrific!!! We need register scavenging.
+ // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+ // also a ABI register so it's possible that is is the register that is
+ // being storing here. If that's the case, we do the following:
+ // r12 = r2
+ // Use r2 to materialize sp + offset
+ // str r12, r2
+ // r2 = r12
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = ARM::R3;
+ if (DestReg == ARM::R3) {
+ BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R12).addReg(ARM::R2);
+ TmpReg = ARM::R2;
+ }
+ emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg,
+ isSub ? -Offset : Offset, TII);
+ MI.getOperand(i).ChangeToRegister(DestReg, false);
+ if (DestReg == ARM::R3)
+ BuildMI(MBB, II, TII.get(ARM::tMOVrr), ARM::R2).addReg(ARM::R12);
+ } else
+ assert(false && "Unexpected opcode!");
} else {
- // Insert a set of r12 with the full address
- // r12 = r13 + offset
- MachineBasicBlock *MBB2 = MI.getParent();
- splitInstructionWithImmediate(*MBB2, II, TII.get(ARM::ADD), ARM::R12,
- BaseRegister, Offset);
-
- // Replace the FrameIndex with r12
- MI.getOperand(FrameIdx).ChangeToRegister(ARM::R12, false);
+ // Insert a set of r12 with the full address: r12 = sp + offset
+ // If the offset we have is too large to fit into the instruction, we need
+ // to form it with a series of ADDri's. Do this by taking 8-bit chunks
+ // out of 'Offset'.
+ emitARMRegPlusImmediate(MBB, II, ARM::R12, FrameReg,
+ isSub ? -Offset : Offset, TII);
+ MI.getOperand(i).ChangeToRegister(ARM::R12, false);
}
}
void ARMRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const {
+ // This tells PEI to spill the FP as if it is any other callee-save register to
+ // take advantage the eliminateFrameIndex machinery. This also ensures it is
+ // spilled in the order specified by getCalleeSavedRegs() to make it easier
+ // to combine multiple loads / stores.
+ bool FramePtrSpilled = MF.getFrameInfo()->hasVarSizedObjects();
+ bool CS1Spilled = false;
+ bool LRSpilled = false;
+ unsigned NumGPRSpills = 0;
+ SmallVector<unsigned, 4> UnspilledCS1GPRs;
+ SmallVector<unsigned, 4> UnspilledCS2GPRs;
+ if (!FramePtrSpilled && NoFramePointerElim) {
+ // Don't spill FP if the frame can be eliminated. This is determined
+ // by scanning the callee-save registers to see if any is used.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ bool Spilled = false;
+ if (MF.isPhysRegUsed(Reg)) {
+ Spilled = true;
+ FramePtrSpilled = true;
+ } else {
+ // Check alias registers too.
+ for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+ if (MF.isPhysRegUsed(*Aliases)) {
+ Spilled = true;
+ FramePtrSpilled = true;
+ }
+ }
+ }
+
+ if (CSRegClasses[i] == &ARM::GPRRegClass) {
+ if (Spilled) {
+ NumGPRSpills++;
+
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (FramePtrSpilled) {
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->setFramePtrSpilled(true);
+
+ // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+ // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+ if (!LRSpilled && CS1Spilled) {
+ MF.changePhyRegUsed(ARM::LR, true);
+ NumGPRSpills++;
+ UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+ UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ }
+
+ // If stack and double are 8-byte aligned and we are spilling a odd number
+ // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+ // the integer and double callee save areas.
+ unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+ if (CS1Spilled && !UnspilledCS1GPRs.empty())
+ MF.changePhyRegUsed(UnspilledCS1GPRs.front(), true);
+ else
+ MF.changePhyRegUsed(UnspilledCS2GPRs.front(), true);
+ }
+ MF.changePhyRegUsed(FramePtr, true);
+ }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int Opc, unsigned Area,
+ const ARMSubtarget &STI) {
+ while (MBBI != MBB.end() &&
+ MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFrameIndex()) {
+ if (Area != 0) {
+ bool Done = false;
+ unsigned Category = 0;
+ switch (MBBI->getOperand(0).getReg()) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ Category = 1;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ Category = STI.isDarwin() ? 2 : 1;
+ break;
+ case ARM::D8:
+ case ARM::D9:
+ case ARM::D10:
+ case ARM::D11:
+ case ARM::D12:
+ case ARM::D13:
+ case ARM::D14:
+ case ARM::D15:
+ Category = 3;
+ break;
+ default:
+ Done = true;
+ break;
+ }
+ if (Done || Category != Area)
+ break;
+ }
+
+ ++MBBI;
+ }
+}
void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
- int NumBytes = (int) MFI->getStackSize();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned NumBytes = MFI->getStackSize();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- bool HasFP = hasFP(MF);
+ // Determine the sizes of each callee-save spill areas and record which frame
+ // belongs to which callee-save spill areas.
+ unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+ int FramePtrSpillFI = 0;
+ if (AFI->isFramePtrSpilled()) {
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, -VARegSaveSize, isThumb, TII);
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ int FI = CSI[i].getFrameIdx();
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ break;
+ case ARM::R8:
+ case ARM::R9:
+ case ARM::R10:
+ case ARM::R11:
+ if (Reg == FramePtr)
+ FramePtrSpillFI = FI;
+ if (STI.isDarwin()) {
+ AFI->addGPRCalleeSavedArea2Frame(FI);
+ GPRCS2Size += 4;
+ } else {
+ AFI->addGPRCalleeSavedArea1Frame(FI);
+ GPRCS1Size += 4;
+ }
+ break;
+ default:
+ AFI->addDPRCalleeSavedAreaFrame(FI);
+ DPRCSSize += 8;
+ }
+ }
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+ emitSPUpdate(MBB, MBBI, -GPRCS1Size, isThumb, TII);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+ } else {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH)
+ ++MBBI;
+ }
+
+ // Point FP to the stack slot that contains the previous FP.
+ BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), FramePtr)
+ .addFrameIndex(FramePtrSpillFI).addImm(0);
+
+ if (!isThumb) {
+ // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+ emitSPUpdate(MBB, MBBI, -GPRCS2Size, false, TII);
+
+ // Build the new SUBri to adjust SP for FP callee-save spill area.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+ emitSPUpdate(MBB, MBBI, -DPRCSSize, false, TII);
+ }
+ }
- if (MFI->hasCalls()) {
+ // If necessary, add one more SUBri to account for the call frame
+ // and/or local storage, alloca area.
+ if (MFI->hasCalls())
// We reserve argument space for call sites in the function immediately on
// entry to the current function. This eliminates the need for add/sub
// brackets around call sites.
- NumBytes += MFI->getMaxCallFrameSize();
- }
-
- if (HasFP)
- // Add space for storing the FP
- NumBytes += 4;
-
- // Align to 8 bytes
- NumBytes = ((NumBytes + 7) / 8) * 8;
+ if (!MF.getFrameInfo()->hasVarSizedObjects())
+ NumBytes += MFI->getMaxCallFrameSize();
+ // Round the size to a multiple of the alignment.
+ NumBytes = (NumBytes+Align-1)/Align*Align;
MFI->setStackSize(NumBytes);
- if (NumBytes) {
- //sub sp, sp, #NumBytes
- splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::SUB), ARM::R13,
- ARM::R13, NumBytes);
- }
+ // Determine starting offsets of spill areas.
+ if (AFI->isFramePtrSpilled()) {
+ unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+ unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+ unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+ AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+ AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+ AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+ AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+ NumBytes = DPRCSOffset;
+ if (NumBytes) {
+ // Insert it after all the callee-save spills.
+ if (!isThumb)
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+ emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
+ }
+ } else
+ emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
+ AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+ AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+ AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
- if (HasFP) {
- BuildMI(MBB, MBBI, TII.get(ARM::STR))
- .addReg(ARM::R11).addReg(ARM::R13).addImm(0);
- BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R11).addReg(ARM::R13).addImm(0).
- addImm(ARMShift::LSL);
- }
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+ return ((MI->getOpcode() == ARM::FLDD ||
+ MI->getOpcode() == ARM::LDR ||
+ MI->getOpcode() == ARM::tLDRspi) &&
+ MI->getOperand(1).isFrameIndex() &&
+ isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
}
void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = prior(MBB.end());
- assert(MBBI->getOpcode() == ARM::bx &&
+ assert((MBBI->getOpcode() == ARM::BX_RET ||
+ MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET) &&
"Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
- int NumBytes = (int) MFI->getStackSize();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ bool isThumb = AFI->isThumbFunction();
+ unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ int NumBytes = (int)MFI->getStackSize();
+ if (AFI->isFramePtrSpilled()) {
+ // Unwind MBBI to point to first LDR / FLDD.
+ const unsigned *CSRegs = getCalleeSavedRegs();
+ if (MBBI != MBB.begin()) {
+ do
+ --MBBI;
+ while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+ if (!isCSRestore(MBBI, CSRegs))
+ ++MBBI;
+ }
- if (hasFP(MF)) {
- BuildMI(MBB, MBBI, TII.get(ARM::MOV), ARM::R13).addReg(ARM::R11).addImm(0).
- addImm(ARMShift::LSL);
- BuildMI(MBB, MBBI, TII.get(ARM::LDR), ARM::R11).addReg(ARM::R13).addImm(0);
- }
+ // Move SP to start of FP callee save spill area.
+ NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+ AFI->getGPRCalleeSavedArea2Size() +
+ AFI->getDPRCalleeSavedAreaSize());
+ if (isThumb)
+ emitSPUpdate(MBB, MBBI, -NumBytes, isThumb, TII);
+ else {
+ NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot.
+ if (AFI->getGPRCalleeSavedArea2Size() ||
+ AFI->getDPRCalleeSavedAreaSize() ||
+ AFI->getDPRCalleeSavedAreaOffset())
+ if (NumBytes)
+ BuildMI(MBB, MBBI, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+ .addImm(NumBytes);
+ else
+ BuildMI(MBB, MBBI, TII.get(isThumb ? ARM::tMOVrr : ARM::MOVrr),
+ ARM::SP).addReg(FramePtr);
- if (NumBytes){
- //add sp, sp, #NumBytes
- splitInstructionWithImmediate(MBB, MBBI, TII.get(ARM::ADD), ARM::R13,
- ARM::R13, NumBytes);
- }
+ // Move SP to start of integer callee save spill area 2.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), false, TII);
+ // Move SP to start of integer callee save spill area 1.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), false, TII);
+
+ // Move SP to SP upon entry to the function.
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+ emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), false, TII);
+ }
+
+ if (VARegSaveSize)
+ emitSPUpdate(MBB, MBBI, VARegSaveSize, isThumb, TII);
+ } else if (NumBytes != 0) {
+ emitSPUpdate(MBB, MBBI, NumBytes, isThumb, TII);
+ }
}
unsigned ARMRegisterInfo::getRARegister() const {
- return ARM::R14;
+ return ARM::LR;
}
unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
- return hasFP(MF) ? ARM::R11 : ARM::R13;
+ return STI.useThumbBacktraces() ? ARM::R7 : ARM::R11;
}
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 16c277ed92..6b9ac39a41 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -17,18 +17,36 @@
#include "llvm/Target/MRegisterInfo.h"
#include "ARMGenRegisterInfo.h.inc"
+#include <set>
namespace llvm {
-
-class Type;
-class TargetInstrInfo;
+ class TargetInstrInfo;
+ class ARMSubtarget;
+ class Type;
struct ARMRegisterInfo : public ARMGenRegisterInfo {
const TargetInstrInfo &TII;
+ const ARMSubtarget &STI;
+private:
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+public:
+ ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
- ARMRegisterInfo(const TargetInstrInfo &tii);
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// ARM::LR, return the number that it corresponds to (e.g. 14).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
/// Code Generation virtual methods...
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, int FrameIndex,
@@ -43,9 +61,8 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const;
- virtual MachineInstr* foldMemoryOperand(MachineInstr* MI,
- unsigned OpNum,
- int FrameIndex) const;
+ MachineInstr* foldMemoryOperand(MachineInstr* MI, unsigned OpNum,
+ int FrameIndex) const;
const unsigned *getCalleeSavedRegs() const;
@@ -57,7 +74,7 @@ struct ARMRegisterInfo : public ARMGenRegisterInfo {
void eliminateFrameIndex(MachineBasicBlock::iterator II) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 24f53d9331..51d52c6971 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,129 +13,169 @@
//===----------------------------------------------------------------------===//
// Registers are identified with 4-bit ID numbers.
-class ARMReg<string n> : Register<n> {
+class ARMReg<bits<4> num, string n, list<Register> aliases = []> : Register<n> {
+ field bits<4> Num;
let Namespace = "ARM";
+ let Aliases = aliases;
}
-// Ri - 32-bit integer registers
-class Ri<bits<4> num, string n> : ARMReg<n> {
- field bits<4> Num;
- let Num = num;
-}
-// Rf - 32-bit floating-point registers
-class Rf<bits<5> num, string n> : ARMReg<n> {
- field bits<5> Num;
- let Num = num;
-}
-// Rd - Slots in the FP register file for 64-bit floating-point values.
-class Rd<bits<5> num, string n, list<Register> aliases> : ARMReg<n> {
+class ARMFReg<bits<5> num, string n> : Register<n> {
field bits<5> Num;
- let Num = num;
- let Aliases = aliases;
+ let Namespace = "ARM";
}
// Integer registers
-def R0 : Ri< 0, "R0">, DwarfRegNum<0>;
-def R1 : Ri< 1, "R1">, DwarfRegNum<1>;
-def R2 : Ri< 2, "R2">, DwarfRegNum<2>;
-def R3 : Ri< 3, "R3">, DwarfRegNum<3>;
-def R4 : Ri< 4, "R4">, DwarfRegNum<4>;
-def R5 : Ri< 5, "R5">, DwarfRegNum<5>;
-def R6 : Ri< 6, "R6">, DwarfRegNum<6>;
-def R7 : Ri< 7, "R7">, DwarfRegNum<7>;
-def R8 : Ri< 8, "R8">, DwarfRegNum<8>;
-def R9 : Ri< 9, "R9">, DwarfRegNum<9>;
-def R10 : Ri<10, "R10">, DwarfRegNum<10>;
-def R11 : Ri<11, "R11">, DwarfRegNum<11>;
-def R12 : Ri<12, "R12">, DwarfRegNum<12>;
-def R13 : Ri<13, "R13">, DwarfRegNum<13>;
-def R14 : Ri<14, "R14">, DwarfRegNum<14>;
-def R15 : Ri<15, "R15">, DwarfRegNum<15>;
+def R0 : ARMReg< 0, "r0">, DwarfRegNum<0>;
+def R1 : ARMReg< 1, "r1">, DwarfRegNum<1>;
+def R2 : ARMReg< 2, "r2">, DwarfRegNum<2>;
+def R3 : ARMReg< 3, "r3">, DwarfRegNum<3>;
+def R4 : ARMReg< 4, "r4">, DwarfRegNum<4>;
+def R5 : ARMReg< 5, "r5">, DwarfRegNum<5>;
+def R6 : ARMReg< 6, "r6">, DwarfRegNum<6>;
+def R7 : ARMReg< 7, "r7">, DwarfRegNum<7>;
+def R8 : ARMReg< 8, "r8">, DwarfRegNum<8>;
+def R9 : ARMReg< 9, "r9">, DwarfRegNum<9>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<10>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<11>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<12>;
+def SP : ARMReg<13, "sp">, DwarfRegNum<13>;
+def LR : ARMReg<14, "lr">, DwarfRegNum<14>;
+def PC : ARMReg<15, "pc">, DwarfRegNum<15>;
-// TODO: update to VFP-v3
-// Floating-point registers
-def S0 : Rf< 0, "S0">, DwarfRegNum<64>;
-def S1 : Rf< 1, "S1">, DwarfRegNum<65>;
-def S2 : Rf< 2, "S2">, DwarfRegNum<66>;
-def S3 : Rf< 3, "S3">, DwarfRegNum<67>;
-def S4 : Rf< 4, "S4">, DwarfRegNum<68>;
-def S5 : Rf< 5, "S5">, DwarfRegNum<69>;
-def S6 : Rf< 6, "S6">, DwarfRegNum<70>;
-def S7 : Rf< 7, "S7">, DwarfRegNum<71>;
-def S8 : Rf< 8, "S8">, DwarfRegNum<72>;
-def S9 : Rf< 9, "S9">, DwarfRegNum<73>;
-def S10 : Rf<10, "S10">, DwarfRegNum<74>;
-def S11 : Rf<11, "S11">, DwarfRegNum<75>;
-def S12 : Rf<12, "S12">, DwarfRegNum<76>;
-def S13 : Rf<13, "S13">, DwarfRegNum<77>;
-def S14 : Rf<14, "S14">, DwarfRegNum<78>;
-def S15 : Rf<15, "S15">, DwarfRegNum<79>;
-def S16 : Rf<16, "S16">, DwarfRegNum<80>;
-def S17 : Rf<17, "S17">, DwarfRegNum<81>;
-def S18 : Rf<18, "S18">, DwarfRegNum<82>;
-def S19 : Rf<19, "S19">, DwarfRegNum<83>;
-def S20 : Rf<20, "S20">, DwarfRegNum<84>;
-def S21 : Rf<21, "S21">, DwarfRegNum<85>;
-def S22 : Rf<22, "S22">, DwarfRegNum<86>;
-def S23 : Rf<23, "S23">, DwarfRegNum<87>;
-def S24 : Rf<24, "S24">, DwarfRegNum<88>;
-def S25 : Rf<25, "S25">, DwarfRegNum<89>;
-def S26 : Rf<26, "S26">, DwarfRegNum<90>;
-def S27 : Rf<27, "S27">, DwarfRegNum<91>;
-def S28 : Rf<28, "S28">, DwarfRegNum<92>;
-def S29 : Rf<29, "S29">, DwarfRegNum<93>;
-def S30 : Rf<30, "S30">, DwarfRegNum<94>;
-def S31 : Rf<31, "S31">, DwarfRegNum<95>;
+// Float registers
+def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
+def S2 : ARMFReg< 2, "s2">; def S3 : ARMFReg< 3, "s3">;
+def S4 : ARMFReg< 4, "s4">; def S5 : ARMFReg< 5, "s5">;
+def S6 : ARMFReg< 6, "s6">; def S7 : ARMFReg< 7, "s7">;
+def S8 : ARMFReg< 8, "s8">; def S9 : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
-// Aliases of the S* registers used to hold 64-bit fp values (doubles)
-def D0 : Rd< 0, "D0", [S0, S1]>, DwarfRegNum<64>;
-def D1 : Rd< 2, "D1", [S2, S3]>, DwarfRegNum<66>;
-def D2 : Rd< 4, "D2", [S4, S5]>, DwarfRegNum<68>;
-def D3 : Rd< 6, "D3", [S6, S7]>, DwarfRegNum<70>;
-def D4 : Rd< 8, "D4", [S8, S9]>, DwarfRegNum<72>;
-def D5 : Rd<10, "D5", [S10, S11]>, DwarfRegNum<74>;
-def D6 : Rd<12, "D6", [S12, S13]>, DwarfRegNum<76>;
-def D7 : Rd<14, "D7", [S14, S15]>, DwarfRegNum<78>;
-def D8 : Rd<16, "D8", [S16, S17]>, DwarfRegNum<80>;
-def D9 : Rd<18, "D9", [S18, S19]>, DwarfRegNum<82>;
-def D10 : Rd<20, "D10", [S20, S21]>, DwarfRegNum<84>;
-def D11 : Rd<22, "D11", [S22, S23]>, DwarfRegNum<86>;
-def D12 : Rd<24, "D12", [S24, S25]>, DwarfRegNum<88>;
-def D13 : Rd<26, "D13", [S26, S27]>, DwarfRegNum<90>;
-def D14 : Rd<28, "D14", [S28, S29]>, DwarfRegNum<92>;
-def D15 : Rd<30, "D15", [S30, S31]>, DwarfRegNum<94>;
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0 : ARMReg< 0, "d0", [S0, S1]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
// Register classes.
//
-// FIXME: the register order should be defined in terms of the preferred
-// allocation order...
+// pc == Program Counter
+// lr == Link Register
+// sp == Stack Pointer
+// r12 == ip (scratch)
+// r7 == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
//
-def IntRegs : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
- R7, R8, R9, R10, R11, R12,
- R13, R14, R15]> {
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R12, R11,
+ LR, SP, PC]> {
let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
+ // FIXME: We are reserving r12 in case the PEI needs to use it to
+ // generate large stack offset. Make it available once we have register
+ // scavenging.
let MethodBodies = [{
- IntRegsClass::iterator
- IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
- // r15 == Program Counter
- // r14 == Link Register
- // r13 == Stack Pointer
- // r12 == ip (scratch)
- // r11 == Frame Pointer
- // r10 == Stack Limit
- if (hasFP(MF))
- return end() - 5;
- else
- return end() - 4;
+ // FP is R11, R9 is available.
+ static const unsigned ARM_GPR_AO_1[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::LR, ARM::R11 };
+ // FP is R11, R9 is not available.
+ static const unsigned ARM_GPR_AO_2[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R10,
+ ARM::LR, ARM::R11 };
+ // FP is R7, R9 is available.
+ static const unsigned ARM_GPR_AO_3[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R9, ARM::R10,ARM::R11,
+ ARM::LR, ARM::R7 };
+ // FP is R7, R9 is not available.
+ static const unsigned ARM_GPR_AO_4[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R10,ARM::R11,
+ ARM::LR, ARM::R7 };
+ // FP is R7, only low registers available.
+ static const unsigned THUMB_GPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+ GPRClass::iterator
+ GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb())
+ return THUMB_GPR_AO;
+ if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_4;
+ else
+ return ARM_GPR_AO_3;
+ } else {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_AO_2;
+ else
+ return ARM_GPR_AO_1;
+ }
+ }
+
+ GPRClass::iterator
+ GPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+ if (Subtarget.isThumb())
+ I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ else if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isR9Reserved())
+ I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+ else
+ I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+ } else {
+ if (Subtarget.isR9Reserved())
+ I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+ else
+ I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+ }
+
+ return hasFP(MF) ? I-1 : I;
}
}];
}
-def FPRegs : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
-def DFPRegs : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15]>;
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+ D9, D10, D11, D12, D13, D14, D15]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000000..35bb9accc3
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,52 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// FIXME: this is temporary.
+static cl::opt<bool> Thumb("enable-thumb",
+ cl::desc("Switch to thumb mode in ARM backend"));
+
+ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS)
+ : ARMArchVersion(V4T), HasVFP2(false), IsDarwin(false),
+ UseThumbBacktraces(false), IsR9Reserved(false), stackAlignment(8) {
+
+ // Determine default and user specified characteristics
+ std::string CPU = "generic";
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+
+ IsThumb = Thumb;
+
+ // Set the boolean corresponding to the current target triple, or the default
+ // if one cannot be determined, to true.
+ const std::string& TT = M.getTargetTriple();
+ if (TT.length() > 5) {
+ IsDarwin = TT.find("-darwin") != std::string::npos;
+ } else if (TT.empty()) {
+#if defined(__APPLE__)
+ IsDarwin = true;
+#endif
+ }
+
+ if (IsDarwin) {
+ UseThumbBacktraces = true;
+ IsR9Reserved = true;
+ stackAlignment = 4;
+ }
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000000..d5c4b5eb03
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,82 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file was developed by Evan Cheng and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+ enum ARMArchEnum {
+ V4T, V5T, V5TE, V6
+ };
+
+ /// ARMArchVersion - ARM architecture vecrsion: V4T (base), V5T, V5TE,
+ /// and V6.
+ ARMArchEnum ARMArchVersion;
+
+ /// HasVFP2 - True if the processor supports Vector Floating Point (VFP) V2
+ /// instructions.
+ bool HasVFP2;
+
+ /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+ bool IsThumb;
+
+ bool IsDarwin;
+
+ /// UseThumbBacktraces - True if we use thumb style backtraces.
+ bool UseThumbBacktraces;
+
+ /// IsR9Reserved - True if R9 is a not available as general purpose register.
+ bool IsR9Reserved;
+
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned stackAlignment;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified module.
+ ///
+ ARMSubtarget(const Module &M, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
+
+ bool hasV4TOps() const { return ARMArchVersion >= V4T; }
+ bool hasV5TOps() const { return ARMArchVersion >= V5T; }
+ bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+ bool hasV6Ops() const { return ARMArchVersion >= V6; }
+
+ bool hasVFP2() const { return HasVFP2; }
+
+ bool isDarwin() const { return IsDarwin; }
+ bool isThumb() const { return IsThumb; }
+
+ bool useThumbBacktraces() const { return UseThumbBacktraces; }
+ bool isR9Reserved() const { return IsR9Reserved; }
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return stackAlignment; }
+};
+} // End llvm namespace
+
+#endif // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index a1c623c85a..65848e5376 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -12,18 +12,50 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetAsmInfo.h"
-
+#include "ARMTargetMachine.h"
using namespace llvm;
ARMTargetAsmInfo::ARMTargetAsmInfo(const ARMTargetMachine &TM) {
- Data16bitsDirective = "\t.half\t";
- Data32bitsDirective = "\t.word\t";
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isDarwin()) {
+ HasDotTypeDotSizeDirective = false;
+ PrivateGlobalPrefix = "L";
+ GlobalPrefix = "_";
+ ZeroDirective = "\t.space\t";
+ SetDirective = "\t.set";
+ WeakRefDirective = "\t.weak_reference\t";
+ JumpTableDataSection = ".const";
+ CStringSection = "\t.cstring";
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ InlineAsmStart = "@ InlineAsm Start";
+ InlineAsmEnd = "@ InlineAsm End";
+ LCOMMDirective = "\t.lcomm\t";
+ COMMDirectiveTakesAlignment = false;
+
+ NeedsSet = true;
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+ } else {
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ ZeroDirective = "\t.skip\t";
+ WeakRefDirective = "\t.weak\t";
+ StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+ StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+ }
+ AlignmentIsInBytes = false;
Data64bitsDirective = 0;
- ZeroDirective = "\t.skip\t";
CommentString = "@";
+ DataSection = "\t.data";
ConstantPoolSection = "\t.text\n";
- AlignmentIsInBytes = false;
- WeakRefDirective = "\t.weak\t";
- StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
- StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
}
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 4328ca8465..195689e059 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,30 +11,32 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetAsmInfo.h"
#include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
#include "ARMFrameInfo.h"
#include "ARM.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+ cl::desc("Disable load store optimization pass"));
+
namespace {
// Register the target.
RegisterTarget<ARMTargetMachine> X("arm", " ARM");
}
-
-const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
- return new ARMTargetAsmInfo(*this);
-}
-
-
/// TargetMachine ctor - Create an ILP32 architecture model
///
ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
- : DataLayout("e-p:32:32") {
+ : Subtarget(M, FS), DataLayout("e-p:32:32-d:32"), InstrInfo(Subtarget),
+ FrameInfo(Subtarget) {
+ if (Subtarget.isDarwin())
+ NoFramePointerElim = true;
}
unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
@@ -49,14 +51,23 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
}
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+ return new ARMTargetAsmInfo(*this);
+}
+
+
// Pass Pipeline Configuration
bool ARMTargetMachine::addInstSelector(FunctionPassManager &PM, bool Fast) {
PM.add(createARMISelDag(*this));
return false;
}
-bool ARMTargetMachine::addPostRegAlloc(FunctionPassManager &PM, bool Fast) {
- PM.add(createARMFixMulPass());
+bool ARMTargetMachine::addPreEmitPass(FunctionPassManager &PM, bool Fast) {
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (!Fast && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass());
+
+ PM.add(createARMConstantIslandPass());
return true;
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index b9a3d9809d..9c888ea395 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -20,19 +20,17 @@
#include "llvm/Target/TargetFrameInfo.h"
#include "ARMInstrInfo.h"
#include "ARMFrameInfo.h"
+#include "ARMSubtarget.h"
namespace llvm {
class Module;
class ARMTargetMachine : public LLVMTargetMachine {
- const TargetData DataLayout; // Calculates type size & alignment
- ARMInstrInfo InstrInfo;
- ARMFrameInfo FrameInfo;
-
-protected:
- virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
+ ARMSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMInstrInfo InstrInfo;
+ ARMFrameInfo FrameInfo;
public:
ARMTargetMachine(const Module &M, const std::string &FS);
@@ -42,11 +40,14 @@ public:
return &InstrInfo.getRegisterInfo();
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
+ virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
static unsigned getModuleMatchQuality(const Module &M);
+ virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
// Pass Pipeline Configuration
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
- virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
+ virtual bool addPreEmitPass(FunctionPassManager &PM, bool Fast);
virtual bool addAssemblyEmitter(FunctionPassManager &PM, bool Fast,
std::ostream &Out);
};
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 4310b8c8e4..77300a1480 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -7,6 +7,7 @@
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
+
LEVEL = ../../..
LIBRARYNAME = LLVMARM
TARGET = ARM
@@ -15,7 +16,6 @@ TARGET = ARM
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
- ARMGenDAGISel.inc
+ ARMGenDAGISel.inc ARMGenSubtarget.inc
include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 0000000000..e9e943d0ae
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,17 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+ the smallest.
+* Add support for compiling individual basic blocks in thumb mode, when in a
+ larger ARM function. This can be used for presumed cold code, like paths
+ to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+ load/store 32-bit integers with pre/postinc by using load/store multiple
+ instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+ and cmp instructions can use high registers. Also, we can use them as
+ temporaries to spill values into.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0f79322b63..000e8e6450 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -2,69 +2,438 @@
// Random ideas for the ARM backend.
//===---------------------------------------------------------------------===//
-Consider implementing a select with two conditional moves:
+Reimplement 'select' in terms of 'SEL'.
-cmp x, y
-moveq dst, a
-movne dst, b
+* We would really like to support UXTAB16, but we need to prove that the
+ add doesn't need to overflow between the two 16-bit chunks.
-----------------------------------------------------------
+* implement predication support
+* Implement pre/post increment support. (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
+* Consider materializing FP constants like 0.0f and 1.0f using integer
+ immediate instructions then copy to FPU. Slower than load into FPU?
-%tmp1 = shl int %b, ubyte %c
-%tmp4 = add int %a, %tmp1
+//===---------------------------------------------------------------------===//
+
+The constant island pass is extremely naive. If a constant pool entry is
+out of range, it *always* splits a block and inserts a copy of the cp
+entry inline. It should:
+
+1. Check to see if there is already a copy of this constant nearby. If so,
+ reuse it.
+2. Instead of always splitting blocks to insert the constant, insert it in
+ nearby 'water'.
+3. Constant island references should be ref counted. If a constant reference
+ is out-of-range, and the last reference to a constant is relocated, the
+ dead constant should be removed.
+
+This pass has all the framework needed to implement this, but it hasn't
+been done.
+
+//===---------------------------------------------------------------------===//
+
+We need to start generating predicated instructions. The .td files have a way
+to express this now (see the PPC conditional return instruction), but the
+branch folding pass (or a new if-cvt pass) should start producing these, at
+least in the trivial case.
+
+Among the obvious wins, doing so can eliminate the need to custom expand
+copysign (i.e. we won't need to custom expand it to get the conditional
+negate).
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in. These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
+
+into:
+
+_foo:
+ rsb r1, r0, #0
+ cmn r0, #1
+ movgt r1, r0
+ mov r0, r1
+ bx lr
+
+This is very, uh, literal. This could be a 3 operation sequence:
+ t = (p sra 31);
+ res = (p xor t)-t
+
+Which would be better. This occurs in png decode.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+ when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
-compiles to
+ fldd d0, [r4]
+ fstd d0, [r4, #+32]
+ fldd d0, [r4, #+8]
+ fstd d0, [r4, #+40]
+ fldd d0, [r4, #+16]
+ fstd d0, [r4, #+48]
+ fldd d0, [r4, #+24]
+ fstd d0, [r4, #+56]
-add r0, r0, r1, lsl r2
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
-but
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
-%tmp1 = shl int %b, ubyte %c
-%tmp4 = add int %tmp1, %a
+ ldr r0, LCPI17_3
+ ldr r1, LCPI17_4
+ ldr r2, LCPI17_5
-compiles to
-mov r1, r1, lsl r2
-add r0, r1, r0
+ as
+ ldr r0, LCPI17
+ ldr r1, LCPI17+4
+ ldr r2, LCPI17+8
----------------------------------------------------------
-%tmp1 = shl int %b, ubyte 4
-%tmp2 = add int %a, %tmp1
+ Then the ldr's can be combined into a single ldm. See Olden/power.
-compiles to
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
-mov r2, #4
-add r0, r0, r1, lsl r2
+ adr r0, L6
+ ldmia r0, {r0-r1}
-should be
+ .align 2
+L6:
+ .long -858993459
+ .long 1074318540
-add r0, r0, r1, lsl #4
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
-----------------------------------------------------------
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {
+ double r = foo(3.1);
+ return x+r;
+}
+
+_bar:
+ sub sp, sp, #16
+ str r4, [sp, #+12]
+ str r5, [sp, #+8]
+ str lr, [sp, #+4]
+ mov r4, r0
+ mov r5, r1
+ ldr r0, LCPI2_0
+ bl _foo
+ fmsr f0, r0
+ fcvtsd d0, f0
+ fmdrr d1, r4, r5
+ faddd d0, d0, d1
+ fmrrd r0, r1, d0
+ ldr lr, [sp, #+4]
+ ldr r5, [sp, #+8]
+ ldr r4, [sp, #+12]
+ add sp, sp, #16
+ bx lr
+
+Ignore the prologue and epilogue stuff for a second. Note
+ mov r4, r0
+ mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+ double d1;
+ int s1;
+};
+
+void foo(struct s S) {
+ printf("%g, %d\n", S.d1, S.s1);
+}
-add an offset to FLDS/FLDD/FSTD/FSTS addressing mode
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
-----------------------------------------------------------
+ stmfd sp!, {r7, lr}
+ add r7, sp, #0
+ sub sp, sp, #12
+ stmia sp, {r0, r1, r2}
+ ldmia sp, {r1-r2}
+ ldr r0, L5
+ ldr r3, [sp, #8]
+L2:
+ add r0, pc, r0
+ bl L_printf$stub
-the function
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
-void %f() {
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+ struct s S = {1.1, 2};
+ return S;
+}
+
+ mov ip, r0
+ ldr r0, L5
+ sub sp, sp, #12
+L2:
+ add r0, pc, r0
+ @ lr needed for prologue
+ ldmia r0, {r0, r1, r2}
+ stmia sp, {r0, r1, r2}
+ stmia ip, {r0, r1, r2}
+ mov r0, ip
+ add sp, sp, #12
+ bx lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
entry:
- call void %g( int 1, int 2, int 3, int 4, int 5 )
+ %S = alloca %struct.s, align 4 ; <%struct.s*> [#uses=1]
+ %memtmp = alloca %struct.s ; <%struct.s*> [#uses=1]
+ cast %struct.s* %S to sbyte* ; <sbyte*>:0 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+ cast %struct.s* %agg.result to sbyte* ; <sbyte*>:1 [#uses=2]
+ call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+ cast %struct.s* %memtmp to sbyte* ; <sbyte*>:2 [#uses=1]
+ call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
ret void
}
-declare void %g(int, int, int, int, int)
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
-Only needs 8 bytes of stack space. We currently allocate 16.
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
-----------------------------------------------------------
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
-32 x 32 -> 64 multiplications currently uses two instructions. We
-should try to declare smull and umull as returning two values.
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+ beq LBB3
+ b LBB2
-----------------------------------------------------------
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
-Implement addressing modes 2 (ldrb) and 3 (ldrsb)
+We need register scavenging. Currently, the 'ip' register is reserved in case
+frame indexes are too big. This means that we generate extra code for stuff
+like this:
-----------------------------------------------------------
+void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) {
+ short Rconst = (short) (16384.0f * 1.40200 + 0.5 );
+ *a = x * Rconst;
+ *b = y * Rconst;
+ *c = z * Rconst;
+}
+
+we compile it to:
+
+_foo:
+*** stmfd sp!, {r4, r7}
+*** add r7, sp, #4
+ mov r4, #186
+ orr r4, r4, #89, 24 @ 22784
+ mul r0, r0, r4
+ str r0, [r3]
+ mul r0, r1, r4
+ ldr r1, [sp, #+8]
+ str r0, [r1]
+ mul r0, r2, r4
+ ldr r1, [sp, #+12]
+ str r0, [r1]
+*** sub sp, r7, #4
+*** ldmfd sp!, {r4, r7}
+ bx lr
+
+GCC produces:
+
+_foo:
+ ldr ip, L4
+ mul r0, ip, r0
+ mul r1, ip, r1
+ str r0, [r3, #0]
+ ldr r3, [sp, #0]
+ mul r2, ip, r2
+ str r1, [r3, #0]
+ ldr r3, [sp, #4]
+ str r2, [r3, #0]
+ bx lr
+L4:
+ .long 22970
+
+This is apparently all because we couldn't use ip here.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+ to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+ patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates. For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+ int i = 0;
+
+ if (StackPtr != 0) {
+ while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+ Line[i++] = Stack[--StackPtr];
+ if (LineLen > 32768)
+ {
+ while (StackPtr != 0 && i < LineLen)
+ {
+ i++;
+ --StackPtr;
+ }
+ }
+ }
+ return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//