diff options
Diffstat (limited to 'lib/Target/ARM')
22 files changed, 432 insertions, 349 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 10d7f56c7f..7af8b9d909 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> - : Processor<Name, GenericItineraries, Features>; + : Processor<Name, NoItineraries, Features>; // V4 Processors. def : ProcNoItin<"generic", []>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 967c0a8462..76cd0c389d 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,8 +23,8 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" @@ -515,7 +515,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); case 'a': // Print as a memory address. if (MI->getOperand(OpNum).isReg()) { O << "[" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0811d226b4..08e55429ce 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1738,26 +1738,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } +/// analyzeCompare - For a comparison instruction, return the source registers +/// in SrcReg and SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. bool ARMBaseInstrInfo:: -AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, - int &CmpValue) const { +analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const { switch (MI->getOpcode()) { default: break; case ARM::CMPri: case ARM::t2CMPri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; case ARM::CMPrr: case ARM::t2CMPrr: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = MI->getOperand(1).getReg(); CmpMask = ~0; CmpValue = 0; return true; case ARM::TSTri: case ARM::t2TSTri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = MI->getOperand(1).getImm(); CmpValue = 0; return true; @@ -1795,21 +1802,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, return false; } -/// OptimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. Convert -/// the SUBrr(r1,r2)|Subri(r1,CmpValue) instruction into one that sets the flags -/// register and remove the CMPrr(r1,r2)|CMPrr(r2,r1)|CMPri(r1,CmpValue) -/// instruction. -bool ARMBaseInstrInfo:: -OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} + +/// isRedundantFlagInstr - check whether the first instruction, whose only +/// purpose is to update flags, can be made redundant. +/// CMPrr can be made redundant by SUBrr if the operands are the same. +/// CMPri can be made redundant by SUBri if the operands are the same. +/// This function can be extended later on. +inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, + unsigned SrcReg2, int ImmValue, + MachineInstr *OI) { + if ((CmpI->getOpcode() == ARM::CMPrr || + CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::SUBrr || + OI->getOpcode() == ARM::t2SUBrr) && + ((OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) || + (OI->getOperand(1).getReg() == SrcReg2 && + OI->getOperand(2).getReg() == SrcReg))) + return true; - MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); - if (llvm::next(DI) != MRI->def_end()) - // Only support one definition. - return false; + if ((CmpI->getOpcode() == ARM::CMPri || + CmpI->getOpcode() == ARM::t2CMPri) && + (OI->getOpcode() == ARM::SUBri || + OI->getOpcode() == ARM::t2SUBri) && + OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getImm() == ImmValue) + return true; + return false; +} - MachineInstr *MI = &*DI; +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register; +/// Remove a redundant Compare instruction if an earlier instruction can set the +/// flags in the same way as Compare. +/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two +/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the +/// condition code of instructions which use the flags. +bool ARMBaseInstrInfo:: +optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const { + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { @@ -1840,13 +1893,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). MachineInstr *Sub = NULL; - unsigned SrcReg2 = 0; - if (CmpInstr->getOpcode() == ARM::CMPrr || - CmpInstr->getOpcode() == ARM::t2CMPrr) { - SrcReg2 = CmpInstr->getOperand(1).getReg(); + if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = NULL; - } else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { + else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. @@ -1859,40 +1909,19 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) - return false; - if (!MO.isReg()) continue; - + if (Instr.modifiesRegister(ARM::CPSR, TRI) || + Instr.readsRegister(ARM::CPSR, TRI)) // This instruction modifies or uses CPSR after the one we want to // change. We can't do this transformation. - if (MO.getReg() == ARM::CPSR) - return false; - } - - // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1). - if (SrcReg2 != 0 && - (Instr.getOpcode() == ARM::SUBrr || - Instr.getOpcode() == ARM::t2SUBrr) && - ((Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getReg() == SrcReg2) || - (Instr.getOperand(1).getReg() == SrcReg2 && - Instr.getOperand(2).getReg() == SrcReg))) { - Sub = &*I; - break; - } + return false; - // Check whether the current instruction is SUBri(r1, CmpValue). - if ((CmpInstr->getOpcode() == ARM::CMPri || - CmpInstr->getOpcode() == ARM::t2CMPri) && - Instr.getOpcode() == ARM::SUBri && CmpValue != 0 && - Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getImm() == CmpValue) { + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { Sub = &*I; break; } @@ -1950,7 +1979,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // CPSR use (i.e. used in another block), then it's not safe to perform // the optimization. // When checking against Sub, we handle the condition codes GE, LT, GT, LE. - SmallVector<MachineOperand*, 4> OperandsToUpdate; + SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> + OperandsToUpdate; bool isSafe = false; I = CmpInstr; E = CmpInstr->getParent()->end(); @@ -1971,30 +2001,20 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, } // Condition code is after the operand before CPSR. ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); - if (Sub) - switch (CC) { - default: + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) return false; - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: - case ARMCC::HS: - case ARMCC::LS: - case ARMCC::HI: - case ARMCC::LO: - case ARMCC::EQ: - case ARMCC::NE: - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based - // on CMP needs to be updated to be based on SUB. - // Push the condition code operands to OperandsToUpdate. - // If it is safe to remove CmpInstr, the condition code of these - // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(&((*I).getOperand(IO-1))); - break; - } + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) + OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), + NewCC)); + } else switch (CC) { default: @@ -2024,26 +2044,9 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - for (unsigned i = 0; i < OperandsToUpdate.size(); i++) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)OperandsToUpdate[i]->getImm(); - ARMCC::CondCodes NewCC; - switch (CC) { - default: llvm_unreachable("only expecting less/greater comparisons here"); - case ARMCC::GE: NewCC = ARMCC::LE; break; - case ARMCC::LT: NewCC = ARMCC::GT; break; - case ARMCC::GT: NewCC = ARMCC::LT; break; - case ARMCC::LE: NewCC = ARMCC::GE; break; - case ARMCC::HS: NewCC = ARMCC::LS; break; - case ARMCC::LS: NewCC = ARMCC::HS; break; - case ARMCC::HI: NewCC = ARMCC::LO; break; - case ARMCC::LO: NewCC = ARMCC::HI; break; - case ARMCC::EQ: - case ARMCC::NE: - NewCC = CC; - break; - } - OperandsToUpdate[i]->setImm(NewCC); - } + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + return true; } } @@ -2175,9 +2178,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; + int ItinUOps = ItinData->getNumMicroOps(Class); + if (ItinUOps >= 0) + return ItinUOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2251,19 +2254,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - UOps = (NumRegs / 2); + int A8UOps = (NumRegs / 2); if (NumRegs % 2) - ++UOps; - return UOps; + ++A8UOps; + return A8UOps; } else if (Subtarget.isCortexA9()) { - UOps = (NumRegs / 2); + int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++UOps; - return UOps; + ++A9UOps; + return A9UOps; } else { // Assume the worst. return NumRegs; @@ -2763,11 +2766,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned NewUseIdx; const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, Reg, NewUseIdx, UseAdj); - if (NewUseMI) { - UseMI = NewUseMI; - UseIdx = NewUseIdx; - UseMCID = &UseMI->getDesc(); - } + if (!NewUseMI) + return -1; + + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); } if (Reg == ARM::CPSR) { @@ -2795,6 +2799,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } + if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) + return -1; + unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() @@ -3015,9 +3022,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, return 1; // If the second MI is predicated, then there is an implicit use dependency. - int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI, - DepMI->getNumOperands()); - return (Latency <= 0) ? 1 : Latency; + return getInstrLatency(ItinData, DefMI); } unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, @@ -3054,9 +3059,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, unsigned Class = MCID.getSchedClass(); // For instructions with variable uops, use uops as latency. - if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) { + if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) return getNumMicroOps(ItinData, MI); - } + // For the common case, fall back on the itinerary's latency. unsigned Latency = ItinData->getStageLatency(Class); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 8217f239d1..1a10a4ab1c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -186,16 +186,20 @@ public: return NumCycles == 1; } - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - int &CmpMask, int &CmpValue) const; - - /// OptimizeCompareInstr - Convert the instruction to set the zero flag so - /// that we can remove a "comparison with zero". - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - int CmpMask, int CmpValue, + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const; + + /// optimizeCompareInstr - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero"; Remove a redundant CMP + /// instruction if the flags can be updated in the same way by an earlier + /// instruction such as SUB. + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; /// FoldImmediate - 'Reg' is known to be defined by a move immediate diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c386a01e89..3650e1fb77 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -471,22 +471,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0); + MIB.addReg(D0, getUndefRegState(SrcIsUndef)); if (NumRegs > 1 && TableEntry->copyAllListRegs) - MIB.addReg(D1); + MIB.addReg(D1, getUndefRegState(SrcIsUndef)); if (NumRegs > 2 && TableEntry->copyAllListRegs) - MIB.addReg(D2); + MIB.addReg(D2, getUndefRegState(SrcIsUndef)); if (NumRegs > 3 && TableEntry->copyAllListRegs) - MIB.addReg(D3); + MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) // Add an implicit kill for the super-reg. + if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); // Transfer memoperands. diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2158b7e028..ff660210ea 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -177,7 +177,6 @@ class ARMFastISel : public FastISel { bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1361,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); - return true; + return true; } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -1740,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { // type and the target independent selector doesn't know how to handle it. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; - + unsigned Opc; switch (ISDOpcode) { default: return false; @@ -2146,7 +2145,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return false; // Can't handle non-double multi-reg retvals. - if (RetVT != MVT::isVoid && RetVT != MVT::i32) { + if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); @@ -2352,7 +2351,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addReg(CalleeReg); else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); - else + else MIB.addExternalSymbol(IntrMemName, 0); } else { if (UseReg) @@ -2365,7 +2364,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Explicitly adding the predicate here. AddDefaultPred(MIB); } - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2486,10 +2485,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return true; } } - + if (!MTI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) return false; @@ -2501,13 +2500,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // Don't handle volatile. if (MSI.isVolatile()) return false; - + if (!MSI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MSI.getDestAddressSpace() > 255) return false; - + return SelectCall(&I, "memset"); } case Intrinsic::trap: { @@ -2518,7 +2517,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { } bool ARMFastISel::SelectTrunc(const Instruction *I) { - // The high bits for a type smaller than the register size are assumed to be + // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); @@ -2709,7 +2708,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - + unsigned ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e1c89e0c42..238b79e1f1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6561,11 +6561,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize; + unsigned ldrOpc, strOpc, UnitSize = 0; const TargetRegisterClass *TRC = isThumb2 ? (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC_Vec = 0; if (Align & 1) { ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; @@ -6576,10 +6577,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; - UnitSize = 4; + // Check whether we can use NEON instructions. + if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if ((Align % 16 == 0) && SizeVal >= 16) { + ldrOpc = ARM::VLD1q32wb_fixed; + strOpc = ARM::VST1q32wb_fixed; + UnitSize = 16; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; + } + else if ((Align % 8 == 0) && SizeVal >= 8) { + ldrOpc = ARM::VLD1d32wb_fixed; + strOpc = ARM::VST1d32wb_fixed; + UnitSize = 8; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; + } + } + // Can't use NEON instructions. + if (UnitSize == 0) { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } } + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6590,10 +6611,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(destIn).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc), scratch) .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); @@ -6739,8 +6767,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(TRC); - if (isThumb2) { + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(destPhi).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); @@ -7113,9 +7147,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); @@ -7132,12 +7163,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); - // insert a movs at the end of BB - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), - NewMovDstReg) - .addReg(ABSSrcReg, RegState::Kill) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::CPSR, RegState::Define); + // insert a cmp at the end of BB + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -7149,7 +7178,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(NewMovDstReg, RegState::Kill) + .addReg(ABSSrcReg, RegState::Kill) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, @@ -7157,7 +7186,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) - .addReg(NewMovDstReg).addMBB(BB); + .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI->eraseFromParent(); diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index b8f607eb4c..31b0c41f08 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { if (hasNOP()) { - NopInst.setOpcode(ARM::NOP); + NopInst.setOpcode(ARM::HINT); + NopInst.addOperand(MCOperand::CreateImm(0)); NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); NopInst.addOperand(MCOperand::CreateReg(0)); } else { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 611d9194fd..6a14871bb0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -244,7 +244,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " |