diff options
author | Evan Cheng <evan.cheng@apple.com> | 2009-07-28 05:48:47 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2009-07-28 05:48:47 +0000 |
commit | 6495f63945e8dbde81f03a1dc2ab421993b9a495 (patch) | |
tree | 3938678849c493091334cec9e5e426962ea88090 | |
parent | f4b64f67b6d397cedcb8a9c0539f62957cca1627 (diff) |
- More refactoring. This gets rid of all of the getOpcode calls.
- This change also makes it possible to switch between ARM / Thumb on a
per-function basis.
- Fixed thumb2 routine which expand reg + arbitrary immediate. It was using
using ARM so_imm logic.
- Use movw and movt to do reg + imm when profitable.
- Other code clean ups and minor optimizations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77300 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMAddressingModes.h | 11 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 158 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.h | 103 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 271 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.h | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.cpp | 13 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 15 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1InstrInfo.cpp | 12 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1InstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1RegisterInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1RegisterInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.cpp | 329 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2RegisterInfo.cpp | 211 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2RegisterInfo.h | 13 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/large-stack.ll | 28 |
17 files changed, 635 insertions, 566 deletions
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 5bbf35d296..f85cb54c88 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -274,6 +274,7 @@ namespace ARM_AM { return V >> getThumbImmValShift(V); } + /// getT2SOImmValSplat - Return the 12-bit encoded representation /// if the specified value can be obtained by splatting the low 8 bits /// into every other byte or every byte of a 32-bit value. i.e., @@ -283,7 +284,7 @@ namespace ARM_AM { /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3 /// Return -1 if none of the above apply. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValSplat(unsigned V) { + static inline int getT2SOImmValSplatVal(unsigned V) { unsigned u, Vs, Imm; // control = 0 if ((V & 0xffffff00) == 0) @@ -307,11 +308,11 @@ namespace ARM_AM { return -1; } - /// getT2SOImmValRotate - Return the 12-bit encoded representation if the + /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the /// specified value is a rotated 8-bit value. Return -1 if no rotation /// encoding is possible. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValRotate (unsigned V) { + static inline int getT2SOImmValRotateVal(unsigned V) { unsigned RotAmt = CountLeadingZeros_32(V); if (RotAmt >= 24) return -1; @@ -329,12 +330,12 @@ namespace ARM_AM { /// See ARM Reference Manual A6.3.2. static inline int getT2SOImmVal(unsigned Arg) { // If 'Arg' is an 8-bit splat, then get the encoded value. - int Splat = getT2SOImmValSplat(Arg); + int Splat = getT2SOImmValSplatVal(Arg); if (Splat != -1) return Splat; // If 'Arg' can be handled with a single shifter_op return the value. - int Rot = getT2SOImmValRotate(Arg); + int Rot = getT2SOImmValRotateVal(Arg); if (Rot != -1) return Rot; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6e1f5580c2..89127edbe3 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -31,8 +31,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &sti) - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), - STI(sti) { + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { } MachineInstr * @@ -290,10 +289,12 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const SmallVectorImpl<MachineOperand> &Cond) const { // FIXME this should probably have a DebugLoc argument DebugLoc dl = DebugLoc::getUnknownLoc(); - int BOpc = !STI.isThumb() - ? ARM::B : (STI.isThumb2() ? ARM::t2B : ARM::tB); - int BccOpc = !STI.isThumb() - ? ARM::Bcc : (STI.isThumb2() ? ARM::t2Bcc : ARM::tBcc); + + ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); + int BOpc = !AFI->isThumbFunction() + ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); + int BccOpc = !AFI->isThumbFunction() + ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); @@ -785,7 +786,7 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, return false; } -int ARMBaseInstrInfo::getMatchingCondBranchOpcode(int Opc) const { +int llvm::getMatchingCondBranchOpcode(int Opc) { if (Opc == ARM::B) return ARM::Bcc; else if (Opc == ARM::tB) @@ -797,3 +798,146 @@ int ARMBaseInstrInfo::getMatchingCondBranchOpcode(int Opc) const { return 0; } + +void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + while (NumBytes) { + unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); + unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); + assert(ThisVal && "Didn't extract field correctly"); + + // We will handle these bits from offset, clear them. + NumBytes &= ~ThisVal; + + assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); + + // Build the new ADD / SUB. + unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + BaseReg = DestReg; + } +} + +int llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + const ARMBaseInstrInfo &TII) { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrMode2. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrMode2; + + if (Opcode == ARM::ADDri) { + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::MOVr)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(FrameRegIdx+1); + return 0; + } else if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(ARM::SUBri)); + } + + // Common case: small offset, fits into instruction. + if (ARM_AM::getSOImmVal(Offset) != -1) { + // Replace the FrameIndex with sp / fp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + return 0; + } + + // Otherwise, pull as much of the immedidate into this ADDri/SUBri + // as possible. + unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + // Get the properly encoded SOImmVal field. + assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && + "Bit extraction didn't work?"); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrMode2: { + ImmIdx = FrameRegIdx+2; + InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 12; + break; + } + case ARMII::AddrMode3: { + ImmIdx = FrameRegIdx+2; + InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + break; + } + case ARMII::AddrMode5: { + ImmIdx = FrameRegIdx+1; + InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + break; + } + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0) { + Offset = -Offset; + isSub = true; + } + + // Attempt to fold address comp. if opcode has offset bits + if (NumBits > 0) { + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + return 0; + } + + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + return (isSub) ? -Offset : Offset; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index f7a7eb5996..c0925fd146 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,37 +155,9 @@ namespace ARMII { I_BitShift = 25, CondShift = 28 }; - - /// ARMII::Op - Holds all of the instruction types required by - /// target specific instruction and register code. ARMBaseInstrInfo - /// and subclasses should return a specific opcode that implements - /// the instruction type. - /// - enum Op { - ADDri, - MOVr, - SUBri - }; -} - -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); -} - -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); -} - -static inline -const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB) { - return MIB.addReg(ARM::CPSR); } class ARMBaseInstrInfo : public TargetInstrInfoImpl { - const ARMSubtarget &STI; - protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &sti); @@ -194,9 +166,6 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; - // Return the opcode that implements 'Op', or 0 if no opcode - virtual unsigned getOpcode(ARMII::Op Op) const =0; - // Return true if the block does not fall through. virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0; @@ -286,22 +255,68 @@ public: const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const; -private: - bool isUncondBranchOpcode(int Opc) const { - return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B; - } +}; - bool isCondBranchOpcode(int Opc) const { - return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; - } +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} - bool isJumpTableBranchOpcode(int Opc) const { - return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd || - Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT; - } +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} - int getMatchingCondBranchOpcode(int Opc) const; -}; +static inline +const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB) { + return MIB.addReg(ARM::CPSR); +} + +static inline +bool isUncondBranchOpcode(int Opc) { + return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B; } +static inline +bool isCondBranchOpcode(int Opc) { + return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; +} + +static inline +bool isJumpTableBranchOpcode(int Opc) { + return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd || + Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT; +} + +int getMatchingCondBranchOpcode(int Opc); + +/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of +/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 +/// code. +void emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII); + +void emitT2RegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII); + + +/// rewriteARMFrameIndex / rewriteT2FrameIndex - +/// Rewrite MI to access 'Offset' bytes from the FP. Return the offset that +/// could not be handled directly in MI. +int rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + const ARMBaseInstrInfo &TII); + +int rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + const ARMBaseInstrInfo &TII); + +} // End llvm namespace + #endif diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index dfad80b56e..43f7df16fa 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -137,11 +137,6 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { } -unsigned ARMBaseRegisterInfo:: -getOpcode(int Op) const { - return TII.getOpcode((ARMII::Op)Op); -} - const unsigned* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const unsigned CalleeSavedRegs[] = { @@ -881,17 +876,6 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, return 0; } -// FIXME: Dup in ARMBaseInstrInfo.cpp -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); -} - -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -936,46 +920,21 @@ hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } -/// emitARMRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in ARM code. -static -void emitARMRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, int NumBytes, - ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII, - DebugLoc dl) { - bool isSub = NumBytes < 0; - if (isSub) NumBytes = -NumBytes; - - while (NumBytes) { - unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); - unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); - assert(ThisVal && "Didn't extract field correctly"); - - // We will handle these bits from offset, clear them. - NumBytes &= ~ThisVal; - - assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); - - // Build the new ADD / SUB. - unsigned Opc = TII.getOpcode(isSub ? ARMII::SUBri : ARMII::ADDri); - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); - BaseReg = DestReg; - } -} - static void -emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const ARMBaseInstrInfo &TII, DebugLoc dl, +emitSPUpdate(bool isARM, + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, const ARMBaseInstrInfo &TII, int NumBytes, ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII, dl); + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); } + void ARMBaseRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -993,18 +952,24 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateCallFramePseudoInstr does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + // Replace the pseudo instruction with a new instruction... unsigned Opc = Old->getOpcode(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); + // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN? if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg); + emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); } else { // Note: PredReg is operand 3 for ADJCALLSTACKUP. unsigned PredReg = Old->getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg); + emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); } } } @@ -1018,7 +983,7 @@ static unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, ARMFunctionInfo *AFI) { unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12; - assert (!AFI->isThumb1OnlyFunction()); + assert(!AFI->isThumb1OnlyFunction()); if (Reg == 0) // Try a already spilled CS register. Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters()); @@ -1026,134 +991,16 @@ unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, return Reg; } -int ARMBaseRegisterInfo:: -rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const -{ - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - bool isSub = false; - - // Memory operands in inline assembly always use AddrMode2. - if (Opcode == ARM::INLINEASM) - AddrMode = ARMII::AddrMode2; - - if (Opcode == ADDriOpc) { - Offset += MI.getOperand(FrameRegIdx+1).getImm(); - if (Offset == 0) { - // Turn it into a move. - MI.setDesc(TII.get(MOVOpc)); - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(FrameRegIdx+1); - return 0; - } else if (Offset < 0) { - Offset = -Offset; - isSub = true; - MI.setDesc(TII.get(SUBriOpc)); - } - - // Common case: small offset, fits into instruction. - if (ARM_AM::getSOImmVal(Offset) != -1) { - // Replace the FrameIndex with sp / fp - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); - return 0; - } - - // Otherwise, pull as much of the immedidate into this ADDri/SUBri - // as possible. - unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); - unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); - - // We will handle these bits from offset, clear them. - Offset &= ~ThisImmVal; - - // Get the properly encoded SOImmVal field. - assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && - "Bit extraction didn't work?"); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); - } else { - unsigned ImmIdx = 0; - int InstrOffs = 0; - unsigned NumBits = 0; - unsigned Scale = 1; - switch (AddrMode) { - case ARMII::AddrMode2: { - ImmIdx = FrameRegIdx+2; - InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 12; - break; - } - case ARMII::AddrMode3: { - ImmIdx = FrameRegIdx+2; - InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 8; - break; - } - case ARMII::AddrMode5: { - ImmIdx = FrameRegIdx+1; - InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 8; - Scale = 4; - break; - } - default: - llvm_unreachable("Unsupported addressing mode!"); - break; - } - - Offset += InstrOffs * Scale; - assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); - if (Offset < 0) { - Offset = -Offset; - isSub = true; - } - - // Attempt to fold address comp. if opcode has offset bits - if (NumBits > 0) { - // Common case: small offset, fits into instruction. - MachineOperand &ImmOp = MI.getOperand(ImmIdx); - int ImmedOffset = Offset / Scale; - unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask * Scale) { - // Replace the FrameIndex with sp - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - if (isSub) - ImmedOffset |= 1 << NumBits; - ImmOp.ChangeToImmediate(ImmedOffset); - return 0; - } - - // Otherwise, it didn't fit. Pull in what we can to simplify the immed. - ImmedOffset = ImmedOffset & Mask; - if (isSub) - ImmedOffset |= 1 << NumBits; - ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask*Scale); - } - } - - return (isSub) ? -Offset : Offset; -} - -void ARMBaseRegisterInfo:: -eliminateFrameIndexImpl(MachineBasicBlock::iterator II, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - int SPAdj, RegScavenger *RS) const { +void +ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - DebugLoc dl = MI.getDebugLoc(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateFrameIndex does not suppor Thumb1!"); while (!MI.getOperand(i).isFI()) { ++i; @@ -1180,7 +1027,12 @@ eliminateFrameIndexImpl(MachineBasicBlock::iterator II, } // modify MI as necessary to handle as much of 'Offset' as possible - Offset = rewriteFrameIndex(MI, i, MOVOpc,ADDriOpc,SUBriOpc, FrameReg, Offset); + if (!AFI->isThumbFunction()) + Offset = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + else { + assert(AFI->isThumb2Function()); + Offset = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + } if (Offset == 0) return; @@ -1201,8 +1053,14 @@ eliminateFrameIndexImpl(MachineBasicBlock::iterator II, ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); - emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, - Offset, Pred, PredReg, TII, dl); + if (!AFI->isThumbFunction()) + emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, + Offset, Pred, PredReg, TII); + else { + assert(AFI->isThumb2Function()); + emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, + Offset, Pred, PredReg, TII); + } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); } @@ -1249,6 +1107,9 @@ emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert(!AFI->isThumb1OnlyFunction() && + "This emitPrologue does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); @@ -1261,11 +1122,11 @@ emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); return; } @@ -1304,24 +1165,25 @@ emitPrologue(MachineFunction &MF) const { } // Build the new SUBri to adjust SP for integer callee-save spill area 1. - emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size); movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI); // Darwin ABI requires FP to point to the stack slot that contains the // previous FP. if (STI.isTargetDarwin() || hasFP(MF)) { + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(getOpcode(ARMII::ADDri)), FramePtr) + BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); AddDefaultCC(AddDefaultPred(MIB)); } // Build the new SUBri to adjust SP for integer callee-save spill area 2. - emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size); // Build the new SUBri to adjust SP for FP callee-save spill area. movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI); - emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize); // Determine starting offsets of spill areas. unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); @@ -1336,7 +1198,7 @@ emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Insert it after all the callee-save spills. movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI); - emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); } if (STI.isTargetELF() && hasFP(MF)) { @@ -1368,21 +1230,22 @@ static bool isCSRestore(MachineInstr *MI, void ARMBaseRegisterInfo:: emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - assert(!STI.isThumb1Only() && - "This emitEpilogue should not be executed for Thumb1!"); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert(!AFI->isThumb1OnlyFunction() && + "This emitEpilogue does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / FLDD. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -1409,36 +1272,38 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getDPRCalleeSavedAreaSize() || AFI->getDPRCalleeSavedAreaOffset()|| hasFP(MF)) { - if (NumBytes) - BuildMI(MBB, MBBI, dl, TII.get(getOpcode(ARMII::SUBri)), ARM::SP) + if (NumBytes) { + unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri; + BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP) .addReg(FramePtr) .addImm(NumBytes) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(getOpcode(ARMII::MOVr)), ARM::SP) + } else { + // Thumb2 or ARM. + unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr; + BuildMI(MBB, MBBI, dl, TII.get(MOVrOpc), ARM::SP) .addReg(FramePtr) .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + } } - } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); - } + } else if (NumBytes) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Move SP to start of integer callee save spill area 2. movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize()); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); // Move SP to start of integer callee save spill area 1. movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size()); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size()); // Move SP to SP upon entry to the function. movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size()); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); } if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize); - + emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 59725fe56a..9165bbc883 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -126,28 +126,12 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Return the offset that - // could not be handled directly in MI. - virtual int - rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const; - - virtual void - eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const { - eliminateFrameIndexImpl(II, ARM::MOVr, ARM::ADDri, ARM::SUBri, SPAdj, RS); - } + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; -protected: - void - eliminateFrameIndexImpl(MachineBasicBlock::iterator II, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - int SPAdj, RegScavenger *RS = NULL) const; - private: unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index e1867298dd..c9d5ac1f2b 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -62,19 +62,6 @@ getUnindexedOpcode(unsigned Opc) const { return 0; } -unsigned ARMInstrInfo:: -getOpcode(ARMII::Op Op) const { - switch (Op) { - case ARMII::ADDri: return ARM::ADDri; - case ARMII::MOVr: return ARM::MOVr; - case ARMII::SUBri: return ARM::SUBri; - default: - break; - } - - return 0; -} - bool ARMInstrInfo:: BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { if (MBB.empty()) return false; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 3e9f0204fe..5d1678d685 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -32,9 +32,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return the opcode that implements 'Op', or 0 if no opcode - unsigned getOpcode(ARMII::Op Op) const; - // Return true if the block does not fall through. bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2e1c9cfdf9..1126be4422 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -428,21 +428,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), "adr$p.w $dst, #${label}_${id:no_hash}", []>; -// ADD rd, sp, #so_imm -def t2ADDrSPi : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - "add.w $dst, $sp, $imm", - []>; - -// ADD rd, sp, #imm12 -def t2ADDrSPi12 : T2XI<(outs GPR:$dst), (ins GPR:$sp, i32imm:$imm), - "addw $dst, $sp, $imm", - []>; - -def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - "addw $dst, $sp, $rhs", - []>; - - //===----------------------------------------------------------------------===// // Load / store Instructions. // diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index ed31132500..2ca2686f1e 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -30,18 +30,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -unsigned Thumb1InstrInfo::getOpcode(ARMII::Op Op) const { - switch (Op) { - case ARMII::ADDri: return ARM::tADDi8; - case ARMII::MOVr: return ARM::tMOVr; - case ARMII::SUBri: return ARM::tSUBi8; - default: - break; - } - - return 0; -} - bool Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { if (MBB.empty()) return false; diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index e227ce17ad..646f4298e3 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return the opcode that implements 'Op', or 0 if no opcode - unsigned getOpcode(ARMII::Op Op) const; - // Return true if the block does not fall through. bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 4b26b4bfe4..bb39b2e6a5 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -388,8 +388,8 @@ static void removeOperands(MachineInstr &MI, unsigned i) { int Thumb1RegisterInfo:: rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const + unsigned FrameReg, int Offset, + unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const { // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo // version then can pull out Thumb1 specific parts here diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index ab01a264d3..05b6ae9cef 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -51,8 +51,8 @@ public: // rewrite MI to access 'Offset' bytes from the FP. Return the offset that // could not be handled directly in MI. int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const; + unsigned FrameReg, int Offset, + unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index ede8ab454b..0c31820f76 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -13,6 +13,7 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -31,18 +32,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -unsigned Thumb2InstrInfo::getOpcode(ARMII::Op Op) const { - switch (Op) { - case ARMII::ADDri: return ARM::t2ADDri; - case ARMII::MOVr: return ARM::t2MOVr; - case ARMII::SUBri: return ARM::t2SUBri; - default: - break; - } - - return 0; -} - bool Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { if (MBB.empty()) return false; @@ -124,3 +113,319 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); } + + +void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + // If profitable, use a movw or movt to materialize the offset. + // FIXME: Use the scavenger to grab a scratch register. + if (DestReg != ARM::SP && DestReg != BaseReg && + NumBytes >= 4096 && + ARM_AM::getT2SOImmVal(NumBytes) == -1) { + bool Fits = false; + if (NumBytes < 65536) { + // Use a movw to materialize the 16-bit constant. + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) + .addImm(NumBytes) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + Fits = true; + } else if ((NumBytes & 0xffff) == 0) { + // Use a movt to materialize the 32-bit constant. + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) + .addReg(DestReg) + .addImm(NumBytes >> 16) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + Fits = true; + } + + if (Fits) { + if (isSub) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addReg(DestReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) + .addReg(DestReg, RegState::Kill) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + } + return; + } + } + + while (NumBytes) { + unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + unsigned ThisVal = NumBytes; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else if (ThisVal < 4096) { + Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } + + // Build the new ADD / SUB. + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + BaseReg = DestReg; + } +} + +static unsigned +negativeOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRi12: return ARM::t2LDRi8; + case ARM::t2LDRHi12: return ARM::t2LDRHi8; + case ARM::t2LDRBi12: return ARM::t2LDRBi8; + case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; + case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; + case ARM::t2STRi12: return ARM::t2STRi8; + case ARM::t2STRBi12: return ARM::t2STRBi8; + case ARM::t2STRHi12: return ARM::t2STRHi8; + + case ARM::t2LDRi8: + case ARM::t2LDRHi8: + case ARM::t2LDRBi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRSBi8: + case ARM::t2STRi8: + case ARM::t2STRBi8: + case ARM::t2STRHi8: + return opcode; + + default: + break; + } + + return 0; +} + +static unsigned +positiveOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRi8: return ARM::t2LDRi12; + case ARM::t2LDRHi8: return ARM::t2LDRHi12; + case ARM::t2LDRBi8: return ARM::t2LDRBi12; + case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; + case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; + case ARM::t2STRi8: return ARM::t2STRi12; + case ARM::t2STRBi8: return ARM::t2STRBi12; + case ARM::t2STRHi8: return ARM::t2STRHi12; + + case ARM::t2LDRi12: + case ARM::t2LDRHi12: + case ARM::t2LDRBi12: + case ARM::t2LDRSHi12: + case ARM::t2LDRSBi12: + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + return opcode; + + default: + break; + } + + return 0; +} + +static unsigned +immediateOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRs: return ARM::t2LDRi12; + case ARM::t2LDRHs: return ARM::t2LDRHi12; + case ARM::t2LDRBs: return ARM::t2LDRBi12; + case ARM::t2LDRSHs: return ARM::t2LDRSHi12; + case ARM::t2LDRSBs: return ARM::t2LDRSBi12; + case ARM::t2STRs: return ARM::t2STRi12; + case ARM::t2STRBs: return ARM::t2STRBi12; + case ARM::t2STRHs: return ARM::t2STRHi12; + + case ARM::t2LDRi12: + case ARM::t2LDRHi12: + case ARM::t2LDRBi12: + case ARM::t2LDRSHi12: + case ARM::t2LDRSBi12: + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + case ARM::t2LDRi8: + case ARM::t2LDRHi8: + case ARM::t2LDRBi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRSBi8: + case ARM::t2STRi8: + case ARM::t2STRBi8: + case ARM::t2STRHi8: + return opcode; + + default: + break; + } + + return 0; +} + +int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + const ARMBaseInstrInfo &TII) { + unsigned Opcode = MI.getOpcode(); + unsigned NewOpc = Opcode; + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrModeT2_i12. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? + + if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::t2MOVr)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(FrameRegIdx+1); + return 0; + } + + if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(ARM::t2SUBri)); + } + + // Common case: small offset, fits into instruction. + if (ARM_AM::getT2SOImmVal(Offset) != -1) { + NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + if (NewOpc != Opcode) + MI.setDesc(TII.get(NewOpc)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + return 0; + } + // Another common case: imm12. + if (Offset < 4096) { + NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + if (NewOpc != Opcode) + MI.setDesc(TII.get(NewOpc)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + return 0; + } + + // Otherwise, extract 8 adjacent bits from the immediate into this + // t2ADDri/t2SUBri. + unsigned RotAmt = CountLeadingZeros_32(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && + "Bit extraction didn't work?"); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); + } else { + // AddrModeT2_so cannot handle any offset. If there is no offset + // register then we change to an immediate version. + NewOpc = Opcode; + if (AddrMode == ARMII::AddrModeT2_so) { + unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); + if (OffsetReg != 0) { + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + return Offset; + } + + MI.RemoveOperand(FrameRegIdx+1); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0); + NewOpc = immediateOffsetOpcode(Opcode); + AddrMode = ARMII::AddrModeT2_i12; + } + + unsigned NumBits = 0; + unsigned Scale = 1; + if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) { + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign convert Opcode to the appropriate + // instruction + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + if (Offset < 0) { + NewOpc = negativeOffsetOpcode(Opcode); + NumBits = 8; + isSub = true; + Offset = -Offset; + } else { + NewOpc = positiveOffsetOpcode(Opcode); + NumBits = 12; + } + } else { + // VFP address modes. + assert(AddrMode == ARMII::AddrMode5); + int InstrOffs=ARM_AM::getAM5Offset(MI.getOperand(FrameRegIdx+1).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(FrameRegIdx+1).getImm()) ==ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + Offset += InstrOffs * 4; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0) { + Offset = -Offset; + isSub = true; + } + } + + if (NewOpc != Opcode) + MI.setDesc(TII.get(NewOpc)); + + MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1); + + // Attempt to fold address computation + // Common case: small offset, fits into instruction. + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with fp/sp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + if (isSub) { + if (AddrMode == ARMII::AddrMode5) + // FIXME: Not consistent. + ImmedOffset |= 1 << NumBits; + else + ImmedOffset = -ImmedOffset; + } + ImmOp.ChangeToImmediate(ImmedOffset); + return 0; + } + + // Otherwise, offset doesn't fit. Pull in what we can to simplify + ImmedOffset = Offset & Mask; + if (isSub) { + if (AddrMode == ARMII::AddrMode5) + // FIXME: Not consistent. + ImmedOffset |= 1 << NumBits; + else + ImmedOffset = -ImmedOffset; + } + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + + return (isSub) ? -Offset : Offset; +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 44c3d8232a..f3688c0084 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return the opcode that implements 'Op', or 0 if no opcode - unsigned getOpcode(ARMII::Op Op) const; - // Return true if the block does not fall through. bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index ae2d21e90c..98be2041fd 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -60,218 +60,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); } -static unsigned -negativeOffsetOpcode(unsigned opcode) -{ - switch (opcode) { - case ARM::t2LDRi12: return ARM::t2LDRi8; - case ARM::t2LDRHi12: return ARM::t2LDRHi8; - case ARM::t2LDRBi12: return ARM::t2LDRBi8; - case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; - case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; - case ARM::t2STRi12: return ARM::t2STRi8; - case ARM::t2STRBi12: return ARM::t2STRBi8; - case ARM::t2STRHi12: return ARM::t2STRHi8; - - case ARM::t2LDRi8: - case ARM::t2LDRHi8: - case ARM::t2LDRBi8: - case ARM::t2LDRSHi8: - case ARM::t2LDRSBi8: - case ARM::t2STRi8: - case ARM::t2STRBi8: - case ARM::t2STRHi8: - return opcode; - - default: - break; - } - - return 0; -} - -static unsigned -positiveOffsetOpcode(unsigned opcode) -{ - switch (opcode) { - case ARM::t2LDRi8: return ARM::t2LDRi12; - case ARM::t2LDRHi8: return ARM::t2LDRHi12; - case ARM::t2LDRBi8: return ARM::t2LDRBi12; - case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; - case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; - case ARM::t2STRi8: return ARM::t2STRi12; - case ARM::t2STRBi8: return ARM::t2STRBi12; - case ARM::t2STRHi8: return ARM::t2STRHi12; - - case ARM::t2LDRi12: - case ARM::t2LDRHi12: - case ARM::t2LDRBi12: - case ARM::t2LDRSHi12: - case ARM::t2LDRSBi12: - case ARM::t2STRi12: - case ARM::t2STRBi12: - case ARM::t2STRHi12: - return opcode; - - default: - break; - } - - return 0; -} - -static unsigned -immediateOffsetOpcode(unsigned opcode) -{ - switch (opcode) { - case ARM::t2LDRs: return ARM::t2LDRi12; - case ARM::t2LDRHs: return ARM::t2LDRHi12; - case ARM::t2LDRBs: return ARM::t2LDRBi12; - case ARM::t2LDRSHs: return ARM::t2LDRSHi12; - case ARM::t2LDRSBs: return ARM::t2LDRSBi12; - case ARM::t2STRs: return ARM::t2STRi12; - case ARM::t2STRBs: return ARM::t2STRBi12; - case ARM::t2STRHs: return ARM::t2STRHi12; - - case ARM::t2LDRi12: - case ARM::t2LDRHi12: - case ARM::t2LDRBi12: - case ARM::t2LDRSHi12: - case ARM::t2LDRSBi12: - case ARM::t2STRi12: - case ARM::t2STRBi12: - case ARM::t2STRHi12: - case ARM::t2LDRi8: - case ARM::t2LDRHi8: - case ARM::t2LDRBi8: - case ARM::t2LDRSHi8: - case ARM::t2LDRSBi8: - case ARM::t2STRi8: - case ARM::t2STRBi8: - case ARM::t2STRHi8: - return opcode; - - default: - break; - } - - return 0; -} - bool Thumb2RegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; } - -int Thumb2RegisterInfo:: -rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const -{ - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - bool isSub = false; - - // Memory operands in inline assembly always use AddrModeT2_i12 - if (Opcode == ARM::INLINEASM) - AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? - - if (Opcode == ADDriOpc) { - Offset += MI.getOperand(FrameRegIdx+1).getImm(); - if (Offset == 0) { - // Turn it into a move. - MI.setDesc(TII.get(MOVOpc)); - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(FrameRegIdx+1); - return 0; - } else if (Offset < 0) { - Offset = -Offset; - isSub = true; - MI.setDesc(TII.get(SUBriOpc)); - } - - // Common case: small offset, fits into instruction. - if (ARM_AM::getT2SOImmVal(Offset) != -1) { - // Replace the FrameIndex with sp / fp - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); - return 0; - } - - // Otherwise, extract 8 adjacent bits from the immediate into this - // t2ADDri/t2SUBri. - unsigned RotAmt = CountLeadingZeros_32(Offset); - if (RotAmt > 24) - RotAmt = 24; - unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt); - - // We will handle these bits from offset, clear them. - Offset &= ~ThisImmVal; - - assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && - "Bit extraction didn't work?"); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); - } else { - // AddrModeT2_so cannot handle any offset. If there is no offset - // register then we change to an immediate version. - if (AddrMode == ARMII::AddrModeT2_so) { - unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); - if (OffsetReg != 0) { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - return Offset; - } - - MI.RemoveOperand(FrameRegIdx+1); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0); - Opcode = immediateOffsetOpcode(Opcode); - AddrMode = ARMII::AddrModeT2_i12; - } - - // Neon and FP address modes are handled by the base ARM version... - if ((AddrMode != ARMII::AddrModeT2_i8) && - (AddrMode != ARMII::AddrModeT2_i12)) { - return ARMBaseRegisterInfo::rewriteFrameIndex(MI, FrameRegIdx, - ARM::t2MOVr, ARM::t2ADDri, ARM::t2SUBri, FrameReg, Offset); - } - - unsigned NumBits = 0; - Offset += MI.getOperand(FrameRegIdx+1).getImm(); - - // i8 supports only negative, and i12 supports only positive, so - // based on Offset sign convert Opcode to the appropriate - // instruction - if (Offset < 0) { - Opcode = negativeOffsetOpcode(Opcode); - NumBits = 8; - isSub = true; - Offset = -Offset; - } - else { - Opcode = positiveOffsetOpcode(Opcode); - NumBits = 12; - } - - if (Opcode) { - MI.setDesc(TII.get(Opcode)); - MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1); - - // Attempt to fold address computation - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask) { - // Replace the FrameIndex with fp/sp - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - ImmOp.ChangeToImmediate((isSub) ? -Offset : Offset); - return 0; - } - - // Otherwise, offset doesn't fit. Pull in what we can to simplify - unsigned ImmedOffset = Offset & Mask; - ImmOp.ChangeToImmediate((isSub) ? -ImmedOffset : ImmedOffset); - Offset &= ~Mask; - } - } - - return (isSub) ? -Offset : Offset; -} diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 72a1cd1c80..a63c60b73b 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -37,19 +37,6 @@ public: unsigned PredReg = 0) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; - - // rewrite MI to access 'Offset' bytes from the FP. Return the offset that - // could not be handled directly in MI. - virtual int - rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc, - unsigned FrameReg, int Offset) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const { - ARMBaseRegisterInfo::eliminateFrameIndexImpl(II, ARM::t2MOVr, ARM::t2ADDri, - ARM::t2SUBri, SPAdj, RS); - } }; } diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll new file mode 100644 index 0000000000..60604f020f --- /dev/null +++ b/test/CodeGen/Thumb2/large-stack.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s + +define void @test1() { +; CHECK: test1: +; CHECK: sub.w sp, sp, #256 + %tmp = alloca [ 64 x i32 ] , align 4 + ret void +} + +define void @test2() { +; CHECK: test2: +; CHECK: sub.w sp, sp, #4160 +; CHECK: sub.w sp, sp, #8 + %tmp = alloca [ 4168 x i8 ] , align 4 + ret void +} + +define i32 @test3() { +; CHECK: test3: +; CHECK: sub.w sp, sp, #805306368 +; CHECK: sub.w sp, sp, #16 + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %a = alloca [805306369 x i8], align 16 + store i32 0, i32* %tmp + %tmp1 = load i32* %tmp + ret i32 %tmp1 +} |