diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-08-21 17:32:13 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-08-21 17:32:13 -0700 |
commit | 66f271497ed92ebb05c66f54616e512606a2e314 (patch) | |
tree | 96d54cd64804ab7c9f2f52f680c3301aa789ce1d /lib/Target/ARM | |
parent | b62e9abf7dd9e39c95327914ce9dfe216386824a (diff) | |
parent | bc363931085587bac42a40653962a3e5acd1ffce (diff) |
Merge up to r162331, git commit bc363931085587bac42a40653962a3e5acd1ffce
Diffstat (limited to 'lib/Target/ARM')
36 files changed, 1911 insertions, 1376 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 7af8b9d909..06ed89395f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -204,17 +204,17 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, FeatureDSPThumb2]>; // V7a Processors. -def : Processor<"cortex-a8", CortexA8Itineraries, +def : ProcessorModel<"cortex-a8", CortexA8Model, // @LOCALMOD-BEGIN // TODO(pdox): Resolve this mismatch. [ProcA8, HasV7Ops, FeatureDB]>; // FeatureNEON, FeatureDSPThumb2, FeatureHasRAS]>; // @LOCALMOD-END -def : Processor<"cortex-a9", CortexA9Itineraries, +def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; -def : Processor<"cortex-a9-mp", CortexA9Itineraries, +def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS]>; @@ -228,7 +228,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops, def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureVFP2, + FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 76cd0c389d..95caf529d6 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -622,10 +622,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. + // This modifier is not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'H': // The highest-numbered register of a pair. return true; + case 'H': { // The highest-numbered register of a pair. + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterClass &RC = ARM::GPRRegClass; + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); + RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. + + unsigned Reg = RC.getRegister(RegIdx); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } } } @@ -1253,8 +1267,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(SrcReg == ARM::SP && "Only stack pointer as a source reg is supported"); for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; - i != NumOps; ++i) - RegList.push_back(MI->getOperand(i).getReg()); + i != NumOps; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Actually, there should never be any impdef stuff here. Skip it + // temporary to workaround PR11902. + if (MO.isImplicit()) + continue; + RegList.push_back(MO.getReg()); + } break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 08e55429ce..aab8825444 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI).addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. @@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, } break; case ARM::VST1q64: + case ARM::VST1d64TPseudo: + case ARM::VST1d64QPseudo: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; - case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI).addImm(16) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) @@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, } break; case ARM::VLD1q64: + case ARM::VLD1d64TPseudo: + case ARM::VLD1d64QPseudo: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } +/// Identify instructions that can be folded into a MOVCC instruction, and +/// return the corresponding opcode for the predicated pseudo-instruction. +static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, + const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return 0; + if (!MRI.hasOneNonDBGUse(Reg)) + return 0; + MI = MRI.getVRegDef(Reg); + if (!MI) + return 0; + // Check if MI has any non-dead defs or physreg uses. This also detects + // predicated instructions which will be reading CPSR. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Reject frame index operands, PEI can't handle the predicated pseudos. + if (MO.isFI() || MO.isCPI() || MO.isJTI()) + return 0; + if (!MO.isReg()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + return 0; + if (MO.isDef() && !MO.isDead()) + return 0; + } + switch (MI->getOpcode()) { + default: return 0; + case ARM::ANDri: return ARM::ANDCCri; + case ARM::ANDrr: return ARM::ANDCCrr; + case ARM::ANDrsi: return ARM::ANDCCrsi; + case ARM::ANDrsr: return ARM::ANDCCrsr; + case ARM::t2ANDri: return ARM::t2ANDCCri; + case ARM::t2ANDrr: return ARM::t2ANDCCrr; + case ARM::t2ANDrs: return ARM::t2ANDCCrs; + case ARM::EORri: return ARM::EORCCri; + case ARM::EORrr: return ARM::EORCCrr; + case ARM::EORrsi: return ARM::EORCCrsi; + case ARM::EORrsr: return ARM::EORCCrsr; + case ARM::t2EORri: return ARM::t2EORCCri; + case ARM::t2EORrr: return ARM::t2EORCCrr; + case ARM::t2EORrs: return ARM::t2EORCCrs; + case ARM::ORRri: return ARM::ORRCCri; + case ARM::ORRrr: return ARM::ORRCCrr; + case ARM::ORRrsi: return ARM::ORRCCrsi; + case ARM::ORRrsr: return ARM::ORRCCrsr; + case ARM::t2ORRri: return ARM::t2ORRCCri; + case ARM::t2ORRrr: return ARM::t2ORRCCrr; + case ARM::t2ORRrs: return ARM::t2ORRCCrs; + + // ARM ADD/SUB + case ARM::ADDri: return ARM::ADDCCri; + case ARM::ADDrr: return ARM::ADDCCrr; + case ARM::ADDrsi: return ARM::ADDCCrsi; + case ARM::ADDrsr: return ARM::ADDCCrsr; + case ARM::SUBri: return ARM::SUBCCri; + case ARM::SUBrr: return ARM::SUBCCrr; + case ARM::SUBrsi: return ARM::SUBCCrsi; + case ARM::SUBrsr: return ARM::SUBCCrsr; + + // Thumb2 ADD/SUB + case ARM::t2ADDri: return ARM::t2ADDCCri; + case ARM::t2ADDri12: return ARM::t2ADDCCri12; + case ARM::t2ADDrr: return ARM::t2ADDCCrr; + case ARM::t2ADDrs: return ARM::t2ADDCCrs; + case ARM::t2SUBri: return ARM::t2SUBCCri; + case ARM::t2SUBri12: return ARM::t2SUBCCri12; + case ARM::t2SUBrr: return ARM::t2SUBCCrr; + case ARM::t2SUBrs: return ARM::t2SUBCCrs; + } +} + +bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + // MOVCC operands: + // 0: Def. + // 1: True use. + // 2: False use. + // 3: Condition code. + // 4: CPSR use. + TrueOp = 1; + FalseOp = 2; + Cond.push_back(MI->getOperand(3)); + Cond.push_back(MI->getOperand(4)); + // We can always fold a def. + Optimizable = true; + return false; +} + +MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + bool PreferFalse) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = 0; + unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); + bool Invert = !Opc; + if (!Opc) + Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); + if (!Opc) + return 0; + + // Create a new predicated version of DefMI. + // Rfalse is the first use. + MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(Opc), MI->getOperand(0).getReg()) + .addOperand(MI->getOperand(Invert ? 2 : 1)); + + // Copy all the DefMI operands, excluding its (null) predicate. + const MCInstrDesc &DefDesc = DefMI->getDesc(); + for (unsigned i = 1, e = DefDesc.getNumOperands(); + i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) + NewMI.addOperand(DefMI->getOperand(i)); + + unsigned CondCode = MI->getOperand(3).getImm(); + if (Invert) + NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); + else + NewMI.addImm(CondCode); + NewMI.addOperand(MI->getOperand(4)); + + // DefMI is not the -S version that sets CPSR, so add an optional %noreg. + if (NewMI->hasOptionalDef()) + AddDefaultCC(NewMI); + + // The caller will erase MI, but not DefMI. + DefMI->eraseFromParent(); + return NewMI; +} + /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR /// def operand. @@ -1975,10 +2152,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, case ARM::t2EORri: { // Scan forward for the use of CPSR // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. If we can't find the - // CPSR use (i.e. used in another block), then it's not safe to perform - // the optimization. - // When checking against Sub, we handle the condition codes GE, LT, GT, LE. + // checking of V bit, then this is not safe to do. + // It is safe to remove CmpInstr if CPSR is redefined or killed. + // If we are done with the basic block, we need to check whether CPSR is + // live-out. SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> OperandsToUpdate; bool isSafe = false; @@ -2018,7 +2195,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, else switch (CC) { default: - isSafe = true; + // CPSR can be used multiple times, we should continue. break; case ARMCC::VS: case ARMCC::VC: @@ -2031,10 +2208,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, } } - // If the candidate is Sub, we may exit the loop at end of the basic block. - // In that case, it is still safe to remove CmpInstr. - if (!isSafe && !Sub) - return false; + // If CPSR is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if (!isSafe) { + MachineBasicBlock *MBB = CmpInstr->getParent(); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(ARM::CPSR)) + return false; + } // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); @@ -2195,7 +2377,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // // On Cortex-A8, each pair of register loads / stores can be scheduled on the // same cycle. The scheduling for the first load / store must be done - // separately by assuming the the address is not 64-bit aligned. + // separately by assuming the address is not 64-bit aligned. // // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON @@ -3177,11 +3359,18 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD is a VFP instruction, but can be changed to NEON if it isn't - // predicated. + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // Cortex-A9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || + MI->getOpcode() == ARM::VMOVSR)) + return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -3201,22 +3390,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { - // We only know how to change VMOVD into VORR. - assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); - if (Domain != ExeNEON) - return; + unsigned DstReg, SrcReg, DReg; + unsigned Lane; + MachineInstrBuilder MIB(MI); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool isKill; + switch (MI->getOpcode()) { + default: + llvm_unreachable("cannot handle opcode!"); + break; + case ARM::VMOVD: + if (Domain != ExeNEON) + break; - // Zap the predicate operands. - assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + break; + case ARM::VMOVRS: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + + MI->setDesc(get(ARM::VGETLNi32)); + MIB.addReg(DReg); + MIB.addImm(Lane); + + MIB->getOperand(1).setIsUndef(); + MIB.addReg(SrcReg, RegState::Implicit); + + AddDefaultPred(MIB); + break; + case ARM::VMOVSR: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + isKill = MI->getOperand(0).isKill(); + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + MI->RemoveOperand(0); + + MI->setDesc(get(ARM::VSETLNi32)); + MIB.addReg(DReg, RegState::Define); + MIB.addReg(DReg, RegState::Undef); + MIB.addReg(SrcReg); + MIB.addImm(Lane); + + if (isKill) + MIB->addRegisterKilled(DstReg, TRI, true); + MIB->addRegisterDefined(DstReg, TRI); + + AddDefaultPred(MIB); + break; + } - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1a10a4ab1c..92e5ee8dcb 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -202,6 +202,13 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + virtual bool analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const; + + virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const; + /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, @@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); +/// Determine if MI can be folded into an ARM MOVCC instruction, and return the +/// opcode of the SSA instruction representing the conditional MI. +unsigned canFoldARMInstrIntoMOVCC(unsigned Reg, + MachineInstr *&MI, + const MachineRegisterInfo &MRI); /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether /// the instruction is encoded with an 'S' bit is determined by the optional diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 533ed8834b..b2ba9703b6 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -65,8 +65,20 @@ extern cl::opt<bool> ReserveR9; // @LOCALMOD const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (ReserveR9) return CSR_NaCl_SaveList; // @LOCALMOD + bool ghcCall = false; + + if (MF) { + const Function *F = MF->getFunction(); + ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + } + + if (ghcCall) { + return CSR_GHC_SaveList; + } + else { return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + } } const uint32_t* diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 1a79d95f95..e0f3a871ba 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[ CCDelegateTo<RetCC_ARM_APCS> ]>; +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for GHC +//===----------------------------------------------------------------------===// + +def CC_ARM_APCS_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, + CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim + CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> +]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts @@ -117,6 +136,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -142,6 +164,9 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -176,6 +201,12 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; +// GHC set of callee saved regs is empty as all those regs are +// used for passing STG regs around +// add is a workaround for not being able to compile empty list: +// def CSR_GHC : CalleeSavedRegs<()>; +def CSR_GHC : CalleeSavedRegs<(add)>; + // @LOCALMOD-START // NaCl does not save R9, but otherwise uses the same order as AAPCS def CSR_NaCl : CalleeSavedRegs<(add LR, R11, R10, R8, R7, R6, R5, R4, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index fc445a9339..e81b4cc282 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -254,7 +254,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); uint32_t Binary; Binary = Imm12 & 0xfff; @@ -296,7 +296,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); // Special value for #-0 @@ -352,6 +352,12 @@ namespace { void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, intptr_ |