diff options
Diffstat (limited to 'lib/Target')
126 files changed, 2944 insertions, 1597 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 10d7f56c7f..7af8b9d909 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> - : Processor<Name, GenericItineraries, Features>; + : Processor<Name, NoItineraries, Features>; // V4 Processors. def : ProcNoItin<"generic", []>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 967c0a8462..76cd0c389d 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,8 +23,8 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" @@ -515,7 +515,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); case 'a': // Print as a memory address. if (MI->getOperand(OpNum).isReg()) { O << "[" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0811d226b4..08e55429ce 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1738,26 +1738,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } +/// analyzeCompare - For a comparison instruction, return the source registers +/// in SrcReg and SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. bool ARMBaseInstrInfo:: -AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, - int &CmpValue) const { +analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const { switch (MI->getOpcode()) { default: break; case ARM::CMPri: case ARM::t2CMPri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; case ARM::CMPrr: case ARM::t2CMPrr: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = MI->getOperand(1).getReg(); CmpMask = ~0; CmpValue = 0; return true; case ARM::TSTri: case ARM::t2TSTri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = MI->getOperand(1).getImm(); CmpValue = 0; return true; @@ -1795,21 +1802,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, return false; } -/// OptimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. Convert -/// the SUBrr(r1,r2)|Subri(r1,CmpValue) instruction into one that sets the flags -/// register and remove the CMPrr(r1,r2)|CMPrr(r2,r1)|CMPri(r1,CmpValue) -/// instruction. -bool ARMBaseInstrInfo:: -OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} + +/// isRedundantFlagInstr - check whether the first instruction, whose only +/// purpose is to update flags, can be made redundant. +/// CMPrr can be made redundant by SUBrr if the operands are the same. +/// CMPri can be made redundant by SUBri if the operands are the same. +/// This function can be extended later on. +inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, + unsigned SrcReg2, int ImmValue, + MachineInstr *OI) { + if ((CmpI->getOpcode() == ARM::CMPrr || + CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::SUBrr || + OI->getOpcode() == ARM::t2SUBrr) && + ((OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) || + (OI->getOperand(1).getReg() == SrcReg2 && + OI->getOperand(2).getReg() == SrcReg))) + return true; - MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); - if (llvm::next(DI) != MRI->def_end()) - // Only support one definition. - return false; + if ((CmpI->getOpcode() == ARM::CMPri || + CmpI->getOpcode() == ARM::t2CMPri) && + (OI->getOpcode() == ARM::SUBri || + OI->getOpcode() == ARM::t2SUBri) && + OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getImm() == ImmValue) + return true; + return false; +} - MachineInstr *MI = &*DI; +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register; +/// Remove a redundant Compare instruction if an earlier instruction can set the +/// flags in the same way as Compare. +/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two +/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the +/// condition code of instructions which use the flags. +bool ARMBaseInstrInfo:: +optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const { + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { @@ -1840,13 +1893,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). MachineInstr *Sub = NULL; - unsigned SrcReg2 = 0; - if (CmpInstr->getOpcode() == ARM::CMPrr || - CmpInstr->getOpcode() == ARM::t2CMPrr) { - SrcReg2 = CmpInstr->getOperand(1).getReg(); + if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = NULL; - } else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { + else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. @@ -1859,40 +1909,19 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. At the same time, search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) - return false; - if (!MO.isReg()) continue; - + if (Instr.modifiesRegister(ARM::CPSR, TRI) || + Instr.readsRegister(ARM::CPSR, TRI)) // This instruction modifies or uses CPSR after the one we want to // change. We can't do this transformation. - if (MO.getReg() == ARM::CPSR) - return false; - } - - // Check whether the current instruction is SUB(r1, r2) or SUB(r2, r1). - if (SrcReg2 != 0 && - (Instr.getOpcode() == ARM::SUBrr || - Instr.getOpcode() == ARM::t2SUBrr) && - ((Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getReg() == SrcReg2) || - (Instr.getOperand(1).getReg() == SrcReg2 && - Instr.getOperand(2).getReg() == SrcReg))) { - Sub = &*I; - break; - } + return false; - // Check whether the current instruction is SUBri(r1, CmpValue). - if ((CmpInstr->getOpcode() == ARM::CMPri || - CmpInstr->getOpcode() == ARM::t2CMPri) && - Instr.getOpcode() == ARM::SUBri && CmpValue != 0 && - Instr.getOperand(1).getReg() == SrcReg && - Instr.getOperand(2).getImm() == CmpValue) { + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { Sub = &*I; break; } @@ -1950,7 +1979,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // CPSR use (i.e. used in another block), then it's not safe to perform // the optimization. // When checking against Sub, we handle the condition codes GE, LT, GT, LE. - SmallVector<MachineOperand*, 4> OperandsToUpdate; + SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> + OperandsToUpdate; bool isSafe = false; I = CmpInstr; E = CmpInstr->getParent()->end(); @@ -1971,30 +2001,20 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, } // Condition code is after the operand before CPSR. ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); - if (Sub) - switch (CC) { - default: + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) return false; - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: - case ARMCC::HS: - case ARMCC::LS: - case ARMCC::HI: - case ARMCC::LO: - case ARMCC::EQ: - case ARMCC::NE: - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based - // on CMP needs to be updated to be based on SUB. - // Push the condition code operands to OperandsToUpdate. - // If it is safe to remove CmpInstr, the condition code of these - // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(&((*I).getOperand(IO-1))); - break; - } + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) + OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), + NewCC)); + } else switch (CC) { default: @@ -2024,26 +2044,9 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Modify the condition code of operands in OperandsToUpdate. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - for (unsigned i = 0; i < OperandsToUpdate.size(); i++) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)OperandsToUpdate[i]->getImm(); - ARMCC::CondCodes NewCC; - switch (CC) { - default: llvm_unreachable("only expecting less/greater comparisons here"); - case ARMCC::GE: NewCC = ARMCC::LE; break; - case ARMCC::LT: NewCC = ARMCC::GT; break; - case ARMCC::GT: NewCC = ARMCC::LT; break; - case ARMCC::LE: NewCC = ARMCC::GE; break; - case ARMCC::HS: NewCC = ARMCC::LS; break; - case ARMCC::LS: NewCC = ARMCC::HS; break; - case ARMCC::HI: NewCC = ARMCC::LO; break; - case ARMCC::LO: NewCC = ARMCC::HI; break; - case ARMCC::EQ: - case ARMCC::NE: - NewCC = CC; - break; - } - OperandsToUpdate[i]->setImm(NewCC); - } + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + return true; } } @@ -2175,9 +2178,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; + int ItinUOps = ItinData->getNumMicroOps(Class); + if (ItinUOps >= 0) + return ItinUOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2251,19 +2254,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - UOps = (NumRegs / 2); + int A8UOps = (NumRegs / 2); if (NumRegs % 2) - ++UOps; - return UOps; + ++A8UOps; + return A8UOps; } else if (Subtarget.isCortexA9()) { - UOps = (NumRegs / 2); + int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++UOps; - return UOps; + ++A9UOps; + return A9UOps; } else { // Assume the worst. return NumRegs; @@ -2763,11 +2766,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned NewUseIdx; const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, Reg, NewUseIdx, UseAdj); - if (NewUseMI) { - UseMI = NewUseMI; - UseIdx = NewUseIdx; - UseMCID = &UseMI->getDesc(); - } + if (!NewUseMI) + return -1; + + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); } if (Reg == ARM::CPSR) { @@ -2795,6 +2799,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } + if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) + return -1; + unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() @@ -3015,9 +3022,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, return 1; // If the second MI is predicated, then there is an implicit use dependency. - int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI, - DepMI->getNumOperands()); - return (Latency <= 0) ? 1 : Latency; + return getInstrLatency(ItinData, DefMI); } unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, @@ -3054,9 +3059,9 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, unsigned Class = MCID.getSchedClass(); // For instructions with variable uops, use uops as latency. - if (!ItinData->isEmpty() && !ItinData->Itineraries[Class].NumMicroOps) { + if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) return getNumMicroOps(ItinData, MI); - } + // For the common case, fall back on the itinerary's latency. unsigned Latency = ItinData->getStageLatency(Class); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 8217f239d1..1a10a4ab1c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -186,16 +186,20 @@ public: return NumCycles == 1; } - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - int &CmpMask, int &CmpValue) const; - - /// OptimizeCompareInstr - Convert the instruction to set the zero flag so - /// that we can remove a "comparison with zero". - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - int CmpMask, int CmpValue, + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const; + + /// optimizeCompareInstr - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero"; Remove a redundant CMP + /// instruction if the flags can be updated in the same way by an earlier + /// instruction such as SUB. + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; /// FoldImmediate - 'Reg' is known to be defined by a move immediate diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c386a01e89..3650e1fb77 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -471,22 +471,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0); + MIB.addReg(D0, getUndefRegState(SrcIsUndef)); if (NumRegs > 1 && TableEntry->copyAllListRegs) - MIB.addReg(D1); + MIB.addReg(D1, getUndefRegState(SrcIsUndef)); if (NumRegs > 2 && TableEntry->copyAllListRegs) - MIB.addReg(D2); + MIB.addReg(D2, getUndefRegState(SrcIsUndef)); if (NumRegs > 3 && TableEntry->copyAllListRegs) - MIB.addReg(D3); + MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) // Add an implicit kill for the super-reg. + if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); // Transfer memoperands. diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2158b7e028..ff660210ea 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -177,7 +177,6 @@ class ARMFastISel : public FastISel { bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1361,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); - return true; + return true; } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -1740,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { // type and the target independent selector doesn't know how to handle it. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; - + unsigned Opc; switch (ISDOpcode) { default: return false; @@ -2146,7 +2145,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return false; // Can't handle non-double multi-reg retvals. - if (RetVT != MVT::isVoid && RetVT != MVT::i32) { + if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); @@ -2352,7 +2351,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, MIB.addReg(CalleeReg); else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); - else + else MIB.addExternalSymbol(IntrMemName, 0); } else { if (UseReg) @@ -2365,7 +2364,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Explicitly adding the predicate here. AddDefaultPred(MIB); } - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2486,10 +2485,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return true; } } - + if (!MTI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) return false; @@ -2501,13 +2500,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // Don't handle volatile. if (MSI.isVolatile()) return false; - + if (!MSI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MSI.getDestAddressSpace() > 255) return false; - + return SelectCall(&I, "memset"); } case Intrinsic::trap: { @@ -2518,7 +2517,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { } bool ARMFastISel::SelectTrunc(const Instruction *I) { - // The high bits for a type smaller than the register size are assumed to be + // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); @@ -2709,7 +2708,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - + unsigned ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e1c89e0c42..238b79e1f1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6561,11 +6561,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize; + unsigned ldrOpc, strOpc, UnitSize = 0; const TargetRegisterClass *TRC = isThumb2 ? (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC_Vec = 0; if (Align & 1) { ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; @@ -6576,10 +6577,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; - UnitSize = 4; + // Check whether we can use NEON instructions. + if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if ((Align % 16 == 0) && SizeVal >= 16) { + ldrOpc = ARM::VLD1q32wb_fixed; + strOpc = ARM::VST1q32wb_fixed; + UnitSize = 16; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; + } + else if ((Align % 8 == 0) && SizeVal >= 8) { + ldrOpc = ARM::VLD1d32wb_fixed; + strOpc = ARM::VST1d32wb_fixed; + UnitSize = 8; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; + } + } + // Can't use NEON instructions. + if (UnitSize == 0) { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } } + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6590,10 +6611,17 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(destIn).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ldrOpc), scratch) .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); @@ -6739,8 +6767,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(TRC); - if (isThumb2) { + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(destPhi).addImm(0).addReg(scratch)); + } else if (isThumb2) { AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); @@ -7113,9 +7147,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewMovDstReg = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? (const TargetRegisterClass*)&ARM::rGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); @@ -7132,12 +7163,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); - // insert a movs at the end of BB - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), - NewMovDstReg) - .addReg(ABSSrcReg, RegState::Kill) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::CPSR, RegState::Define); + // insert a cmp at the end of BB + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -7149,7 +7178,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(NewMovDstReg, RegState::Kill) + .addReg(ABSSrcReg, RegState::Kill) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, @@ -7157,7 +7186,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) - .addReg(NewMovDstReg).addMBB(BB); + .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI->eraseFromParent(); diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index b8f607eb4c..31b0c41f08 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { if (hasNOP()) { - NopInst.setOpcode(ARM::NOP); + NopInst.setOpcode(ARM::HINT); + NopInst.addOperand(MCOperand::CreateImm(0)); NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); NopInst.addOperand(MCOperand::CreateReg(0)); } else { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 611d9194fd..6a14871bb0 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -244,7 +244,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; @@ -265,9 +266,9 @@ class RegConstraint<string C> { // ARM specific transformation functions and pattern fragments. // -// so_imm_neg_XFORM - Return a so_imm value packed into the format described for -// so_imm_neg def below. -def so_imm_neg_XFORM : SDNodeXForm<imm, [{ +// imm_neg_XFORM - Return a imm value packed into the format described for +// imm_neg defs below. +def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; @@ -286,7 +287,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ int64_t Value = -(int)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; - }], so_imm_neg_XFORM> { + }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -599,7 +600,10 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } +def Imm0_15AsmOperand: ImmAsmOperand { + let Name = "Imm0_15"; + let DiagnosticType = "ImmRange0_15"; +} def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; }]> { @@ -644,6 +648,11 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_65535AsmOperand; } +// imm0_65535_neg - An immediate whose negative value is in the range [0.65535]. +def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{ + return -Imm >= 0 && -Imm < 65536; +}]>; + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // @@ -1640,33 +1649,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), NoItinerary, []>; } -def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000000; +def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, + "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; } -def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000001; -} - -def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000010; -} - -def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000011; -} +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1679,18 +1673,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", let Inst{27-20} = 0b01101000; let Inst{7-4} = 0b1011; let Inst{11-8} = 0b1111; - let Unpredictable{11-8} = 0b1111; } -def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", - []>, Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000100; -} - -// The i32imm operand $val can be used by a debugger to store more information +// The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", []>, Requires<[IsARM]> { @@ -3243,6 +3229,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; +def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 66daa1cb69..fec61d2390 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -32,9 +32,6 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = ThumbSRImmAsmOperand; } -def imm_neg_XFORM : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); -}]>; def imm_comp_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 58119baea5..7ea96772aa 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32); }]>; +// so_imm_notSext_XFORM - Return a so_imm value packed into the format +// described for so_imm_notSext def below, with sign extension from 16 +// bits. +def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{ + APInt apIntN = N->getAPIntValue(); + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32); +}]>; + // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. @@ -86,6 +95,17 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ let ParserMatchClass = t2_so_imm_not_asmoperand; } +// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm +// if the upper 16 bits are zero. +def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{ + APInt apIntN = N->getAPIntValue(); + if (!apIntN.isIntN(16)) return false; + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1; + }], t2_so_imm_notSext16_XFORM> { + let ParserMatchClass = t2_so_imm_not_asmoperand; +} + // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ @@ -1911,11 +1931,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), + (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + let AddedComplexity = 1 in def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm), + (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. @@ -1924,6 +1949,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), (t2SBCri rGPR:$src, imm0_255_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only @@ -2134,8 +2161,8 @@ defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) -def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), - (t2RORrr rGPR:$lhs, rGPR:$rhs)>; +def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, @@ -2332,6 +2359,17 @@ let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; +// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise +def top16Zero: PatLeaf<(i32 rGPR:$src), [{ + return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + }]>; + +// so_imm_notSext is needed instead of so_imm_not, as the value of imm +// will match the extended, not the original bitWidth for $src. +def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm), + (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>; + + // FIXME: Disable this pattern on Darwin to workaround an assembler bug. def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, @@ -3426,21 +3464,18 @@ let imod = 0, iflags = 0, M = 1 in // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -class T2I_hint<bits<8> op7_0, string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, []> { - let Inst{31-20} = 0xf3a; - let Inst{19-16} = 0b1111; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - let Inst{10-8} = 0b000; - let Inst{7-0} = op7_0; -} - -def t2NOP : T2I_hint<0b00000000, "nop", ".w">; -def t2YIELD : T2I_hint<0b00000001, "yield", ".w">; -def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; -def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; -def t2SEV : T2I_hint<0b00000100, "sev", ".w">; +def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{ + bits<8> imm; + let Inst{31-8} = 0b111100111010111110000000; + let Inst{7-0} = imm; +} + +def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>; +def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; +def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; +def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; +def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>; +def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>; def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { bits<4> opt; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index dccbffa4c9..4e2cda433b 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1207,6 +1207,14 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics + +// (fma x, y, (fneg z)) -> (vfnms z, x, y)) +def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 45486fd0b6..81d2fa37c2 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass; def IIC_iLoad_d_i : InstrItinClass; def IIC_iLoad_d_r : InstrItinClass; def IIC_iLoad_d_ru : InstrItinClass; -def IIC_iLoad_m : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded -def IIC_iPop : InstrItinClass<0>; // micro-coded -def IIC_iPop_Br : InstrItinClass<0>; // micro-coded +def IIC_iLoad_m : InstrItinClass; +def IIC_iLoad_mu : InstrItinClass; +def IIC_iLoad_mBr : InstrItinClass; +def IIC_iPop : InstrItinClass; +def IIC_iPop_Br : InstrItinClass; def IIC_iLoadiALU : InstrItinClass; def IIC_iStore_i : InstrItinClass; def IIC_iStore_r : InstrItinClass; @@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass; def IIC_iStore_d_i : InstrItinClass; def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; -def IIC_iStore_m : InstrItinClass<0>; // micro-coded -def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_iStore_m : InstrItinClass; +def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; @@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded -def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_m : InstrItinClass; +def IIC_fpLoad_mu : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStore_m : InstrItinClass<0>; // micro-coded -def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded +def IIC_fpStore_m : InstrItinClass; +def IIC_fpStore_mu : InstrItinClass; def IIC_VLD1 : InstrItinClass; def IIC_VLD1x2 : InstrItinClass; def IIC_VLD1x3 : InstrItinClass; @@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], [], []>; - include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index eb1083ca23..61de00a208 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -155,28 +155,30 @@ def CortexA8Itineraries : MultiIssueItineraries< // Load multiple, def is the 5th operand. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 2, 1, 1, 3]>, + [1, 2, 1, 1, 3], [], -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, - InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 3], [], -1>, // dynamic uops // // Push, def is the 3th operand. InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 1, 3]>, - + [1, 1, 3], [], -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -231,12 +233,13 @@ def CortexA8Itineraries : MultiIssueItineraries< // Store multiple. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>]>, + InstrStage<2, [A8_LSPipe]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_LSPipe]>], [2]>, - + InstrStage<2, [A8_LSPipe]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, @@ -397,14 +400,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 2], [], -1>, // dynamic uops // // FP Load Multiple + update InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 2], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, @@ -423,15 +428,16 @@ def CortexA8Itineraries : MultiIssueItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, - + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 1], [], -1>, // dynamic uops // NEON // Issue through integer pipeline, and execute in NEON unit. // diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index a00577bf3d..1677ba6a98 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -284,7 +284,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -292,7 +293,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple plus branch InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -301,7 +303,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -309,7 +312,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop + branch, def is the 3rd operand. InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -318,8 +322,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, - + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -413,14 +417,15 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>]>, + InstrStage<2, [A9_LSUnit]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], [2]>, - + InstrStage<2, [A9_LSUnit]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, @@ -717,7 +722,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Load Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -726,7 +732,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -753,7 +760,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -762,7 +770,8 @@ def CortexA9Itineraries : MultiIssueItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // NEON // VLD1 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 99ed63293d..b12607b206 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -143,22 +143,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - PM->add(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM->getTargetLowering())); return false; } bool ARMPassConfig::addInstSelector() { - PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel())); + addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) - PM->add(createARMLoadStoreOptimizationPass(true)); + addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) - PM->add(createMLxExpansionPass()); + addPass(createMLxExpansionPass()); return true; } @@ -166,23 +166,23 @@ bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) { - PM->add(createARMLoadStoreOptimizationPass()); + addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } if (getARMSubtarget().hasNEON()) - PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. - PM->add(createARMExpandPseudoPass()); + addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) - addPass(IfConverterID); + addPass(&IfConverterID); } if (getARMSubtarget().isThumb2()) - PM->add(createThumb2ITBlockPass()); + addPass(createThumb2ITBlockPass()); return true; } @@ -190,10 +190,10 @@ bool ARMPassConfig::addPreSched2() { bool ARMPassConfig::addPreEmitPass() { if (getARMSubtarget().isThumb2()) { if (!getARMSubtarget().prefers32BitThumb()) - PM->add(createThumb2SizeReductionPass()); + addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - addPass(UnpackMachineBundlesID); + addPass(&UnpackMachineBundlesID); } // @LOCALMOD-START @@ -205,12 +205,12 @@ bool ARMPassConfig::addPreEmitPass() { } // @LOCALMOD-END - PM->add(createARMConstantIslandPass()); + addPass(createARMConstantIslandPass()); // @LOCALMOD-START // This pass does all the heavy sfi lifting. if (getARMSubtarget().isTargetNaCl()) { - PM->add(createARMNaClRewritePass()); + addPass(createARMNaClRewritePass()); } // @LOCALMOD-END diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 00c495b89a..22db332f2b 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -24,20 +24,11 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + bool isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); - isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); + InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - StaticCtorSection = - getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); //LSDASection = NULL; } @@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } - -const MCSection * -ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); - - if (Priority == 65535) - return StaticCtorSection; - - // Emit ctors in priority order. - std::string Name = std::string(".init_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} - -const MCSection * -ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); - - if (Priority == 65535) - return StaticDtorSection; - - // Emit dtors in priority order. - std::string Name = std::string(".fini_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index ff21060414..c6a7261439 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,7 +20,6 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; - bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -32,9 +31,6 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } - - const MCSection * getStaticCtorSection(unsigned Priority) const; - const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2fae489371..68f128189f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -236,7 +236,10 @@ public: Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, Match_RequiresNotITBlock, Match_RequiresV6, - Match_RequiresThumb2 + Match_RequiresThumb2, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "ARMGenAsmMatcher.inc" + }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) @@ -3253,10 +3256,11 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef OptStr = Tok.getString(); - unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size())) + unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) .Case("sh", ARM_MB::ISH) @@ -3284,7 +3288,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef IFlagsStr = Tok.getString(); // An iflags string of "none" is interpreted to mean that none of the AIF @@ -3353,22 +3358,22 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { .Case("xpsr_nzcvq", 0x803) .Case("xpsr_g", 0x403) .Case("xpsr_nzcvqg", 0xc03) - .Case("ipsr", 5) - .Case("epsr", 6) - .Case("iepsr", 7) - .Case("msp", 8) - .Case("psp", 9) - .Case("primask", 16) - .Case("basepri", 17) - .Case("basepri_max", 18) - .Case("faultmask", 19) - .Case("control", 20) + .Case("ipsr", 0x805) + .Case("epsr", 0x806) + .Case("iepsr", 0x807) + .Case("msp", 0x808) + .Case("psp", 0x809) + .Case("primask", 0x810) + .Case("basepri", 0x811) + .Case("basepri_max", 0x812) + .Case("faultmask", 0x813) + .Case("control", 0x814) .Default(~0U); if (FlagsVal == ~0U) return MatchOperand_NoMatch; - if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) + if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; @@ -7410,6 +7415,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_ImmRange0_15: { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); + } } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 92c5d92ff7..bf74a9df3b 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -51,6 +51,8 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) +add_dependencies(LLVMARMCodeGen intrinsics_gen) + # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 9eda04d776..e97f4c7430 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { unsigned Opcode = MI->getOpcode(); + // Check for HINT instructions w/ canonical names. + if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) { + switch (MI->getOperand(0).getImm()) { + case 0: O << "\tnop"; break; + case 1: O << "\tyield"; break; + case 2: O << "\twfe"; break; + case 3: O << "\twfi"; break; + case 4: O << "\tsev"; break; + default: + // Anything else should just print normally. + printInstruction(MI, O); + printAnnotation(O, Annot); + return; + } + printPredicateOperand(MI, 1, O); + if (Opcode == ARM::t2HINT) + O << ".w"; + printAnnotation(O, Annot); + return; + } + // Check for MOVs and print canonical forms, instead. if (Opcode == ARM::MOVsr) { // FIXME: Thumb variants? @@ -736,16 +757,26 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr case 0x403: O << "xpsr_g"; return; case 0xc03: O << "xpsr_nzcvqg"; return; - case 5: O << "ipsr"; return; - case 6: O << "epsr"; return; - case 7: O << "iepsr"; return; - case 8: O << "msp"; return; - case 9: O << "psp"; return; - case 16: O << "primask"; return; - case 17: O << "basepri"; return; - case 18: O << "basepri_max"; return; - case 19: O << "faultmask"; return; - case 20: O << "control"; return; + case 5: + case 0x805: O << "ipsr"; return; + case 6: + case 0x806: O << "epsr"; return; + case 7: + case 0x807: O << "iepsr"; return; + case 8: + case 0x808: O << "msp"; return; + case 9: + case 0x809: O << "psp"; return; + case 0x10: + case 0x810: O << "primask"; return; + case 0x11: + case 0x811: O << "basepri"; return; + case 0x12: + case 0x812: O << "basepri_max"; return; + case 0x13: + case 0x813: O << "faultmask"; return; + case 0x14: + case 0x814: O << "control"; return; } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 8dee1b1d6a..4d922d9b44 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -179,9 +179,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: - case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_ARM_PLT: Type = ELF::R_ARM_PLT32; @@ -193,6 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: @@ -253,10 +253,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_thumb_cp: case ARM::fixup_arm_thumb_br: llvm_unreachable("Unimplemented"); - case ARM::fixup_arm_uncondbranch: - Type = ELF::R_ARM_CALL; - break; case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index cf4f796ec2..1f8ca8681c 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -24,5 +24,7 @@ add_llvm_target(CellSPUCodeGen SPUNopFiller.cpp ) +add_dependencies(LLVMCellSPUCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 14021fef05..03d5a9ae0c 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -301,7 +301,9 @@ bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'L': // Write second word of DImode reference. // Verify that this operand has two consecutive registers. if (!MI->getOperand(OpNo).isReg() || diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3b90261fe6..54764f133c 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -72,7 +72,7 @@ TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { bool SPUPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createSPUISelDag(getSPUTargetMachine())); + addPass(createSPUISelDag(getSPUTargetMachine())); return false; } @@ -85,9 +85,9 @@ bool SPUPassConfig::addPreEmitPass() { (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( "createTCESchedulerPass"); if (schedulerCreator != NULL) - PM->add(schedulerCreator("cellspu")); + addPass(schedulerCreator("cellspu")); //align instructions with nops/lnops for dual issue - PM->add(createSPUNopFillerPass(getSPUTargetMachine())); + addPass(createSPUNopFillerPass(getSPUTargetMachine())); return true; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index cd2ebcb508..c8e757becc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -130,6 +130,7 @@ namespace { private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); + void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM); void printCallingConv(CallingConv::ID cc); void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); @@ -325,6 +326,26 @@ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { } } +void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + Out << "GlobalVariable::NotThreadLocal"; + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "GlobalVariable::GeneralDynamicTLSModel"; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "GlobalVariable::LocalDynamicTLSModel"; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "GlobalVariable::InitialExecTLSModel"; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "GlobalVariable::LocalExecTLSModel"; + break; + } +} + // printEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. void CppWriter::printEscapedString(const std::string &Str) { @@ -996,7 +1017,9 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) { } if (GV->isThreadLocal()) { printCppName(GV); - Out << "->setThreadLocal(true);"; + Out << "->setThreadLocalMode("; + printThreadLocalMode(GV->getThreadLocalMode()); + Out << ");"; nl(Out); } if (is_inline) { @@ -2078,7 +2101,9 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 92bca6c3c7..9cbe7981a9 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -31,7 +31,9 @@ struct CPPTargetMachine : public TargetMachine { virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify); + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 3db17484b0..1f2d8accbb 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -32,6 +32,8 @@ add_llvm_target(HexagonCodeGen HexagonNewValueJump.cpp ) +add_dependencies(LLVMHexagonCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 0dc243f2b8..5fa4740f2a 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -133,7 +133,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS); case 'c': // Don't print "$" before a global var name or constant. // Hexagon never has a prefix. printOperand(MI, OpNo, OS); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 7de27f74e2..a7b291ff2a 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -102,47 +102,47 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - PM->add(createHexagonRemoveExtendOps(getHexagonTargetMachine())); - PM->add(createHexagonISelDag(getHexagonTargetMachine())); - PM->add(createHexagonPeephole()); + addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine())); + addPass(createHexagonPeephole()); return false; } bool HexagonPassConfig::addPreRegAlloc() { if (!DisableHardwareLoops) { - PM->add(createHexagonHardwareLoops()); + addPass(createHexagonHardwareLoops()); } return false; } bool HexagonPassConfig::addPostRegAlloc() { - PM->add(createHexagonCFGOptimizer(getHexagonTargetMachine())); + addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } bool HexagonPassConfig::addPreSched2() { - addPass(IfConverterID); + addPass(&IfConverterID); return true; } bool HexagonPassConfig::addPreEmitPass() { if (!DisableHardwareLoops) { - PM->add(createHexagonFixupHwLoops()); + addPass(createHexagonFixupHwLoops()); } - PM->add(createHexagonNewValueJump()); + addPass(createHexagonNewValueJump()); // Expand Spill code for predicate registers. - PM->add(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); + addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); // Split up TFRcondsets into conditional transfers. - PM->add(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); + addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); // Create Packets. - PM->add(createHexagonPacketizer()); + addPass(createHexagonPacketizer()); return false; } diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index bf1deef491..6c3e8b6447 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -30,6 +30,8 @@ add_llvm_target(MBlazeCodeGen MBlazeELFWriterInfo.cpp ) +add_dependencies(LLVMMBlazeCodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index b4edff0709..c2888553c5 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -50,7 +50,7 @@ def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", // MBlaze processors supported. //===----------------------------------------------------------------------===// -def : Processor<"mblaze", MBlazeGenericItineraries, []>; +def : Processor<"mblaze", NoItineraries, []>; def : Processor<"mblaze3", MBlazePipe3Itineraries, []>; def : Processor<"mblaze5", MBlazePipe5Itineraries, []>; diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 7269697ac2..e9f340f2f6 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -200,7 +200,13 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } printOperand(MI, OpNo, O); return false; diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 4a3ae5fc14..cd5691ce64 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -40,11 +40,6 @@ def IIC_WDC : InstrItinClass; def IIC_Pseudo : InstrItinClass; //===----------------------------------------------------------------------===// -// MBlaze generic instruction itineraries. -//===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>; - -//===----------------------------------------------------------------------===// // MBlaze instruction itineraries for three stage pipeline. //===----------------------------------------------------------------------===// include "MBlazeSchedule3.td" diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 62393d0920..5f82f14203 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -68,7 +68,7 @@ TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen MBlaze code. bool MBlazePassConfig::addInstSelector() { - PM->add(createMBlazeISelDag(getMBlazeTargetMachine())); + addPass(createMBlazeISelDag(getMBlazeTargetMachine())); return false; } @@ -76,6 +76,6 @@ bool MBlazePassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MBlazePassConfig::addPreEmitPass() { - PM->add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); + addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); return true; } diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index a8f9b52746..f9ecaed83a 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -23,6 +23,8 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) +add_dependencies(LLVMMSP430CodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 3acf96bb7d..817001d6ad 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -60,12 +60,12 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); + addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); return false; } bool MSP430PassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createMSP430BranchSelectionPass()); + addPass(createMSP430BranchSelectionPass()); return false; } diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt index ac21c259fb..6c7343bbe5 100644 --- a/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,6 +1,5 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) +add_dependencies(LLVMMipsAsmParser MipsCommonTableGen) diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index bccb5099ef..e9a228c331 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(MipsCodeGen MipsISelDAGToDAG.cpp MipsISelLowering.cpp MipsFrameLowering.cpp + MipsLongBranch.cpp MipsMCInstLower.cpp MipsMachineFunction.cpp MipsRegisterInfo.cpp @@ -31,6 +32,8 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) +add_dependencies(LLVMMipsCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(Disassembler) add_subdirectory(TargetInfo) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index b8fe772544..9c5d31e21c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -37,6 +37,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_GPRel_4: case FK_Data_4: case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_GPOFF_HI: + case Mips::fixup_Mips_GPOFF_LO: + case Mips::fixup_Mips_GOT_PAGE: + case Mips::fixup_Mips_GOT_OFST: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -75,10 +79,8 @@ public: :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - // @LOCALMOD-BEGIN-UPSTREAM return createMipsELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); - // @LOCALMOD-END-UPSTREAM } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided @@ -119,7 +121,8 @@ public: CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); } - uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); + uint64_t Mask = ((uint64_t)(-1) >> + (64 - getFixupKindInfo(Kind).TargetSize)); CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. @@ -160,7 +163,11 @@ public: { "fixup_Mips_TLSLDM", 0, 16, 0 }, { "fixup_Mips_DTPREL_HI", 0, 16, 0 }, { "fixup_Mips_DTPREL_LO", 0, 16, 0 }, - { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_GPOFF_HI", 0, 16, 0 }, + { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, + { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, + { "fixup_Mips_GOT_OFST", 0, 16, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 2091bec500..9f9272886e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,7 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); virtual ~MipsELFObjectWriter(); @@ -52,9 +52,11 @@ namespace { }; } -MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI) +MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false) {} + /*HasRelocationAddend*/ false, + /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} @@ -148,8 +150,23 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_Mips_PC16: Type = ELF::R_MIPS_PC16; break; + case Mips::fixup_Mips_GOT_PAGE: + Type = ELF::R_MIPS_GOT_PAGE; + break; + case Mips::fixup_Mips_GOT_OFST: + Type = ELF::R_MIPS_GOT_OFST; + break; + case Mips::fixup_Mips_GPOFF_HI: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type); + break; + case Mips::fixup_Mips_GPOFF_LO: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); + break; } - return Type; } @@ -184,10 +201,10 @@ static int CompareOffset(const RelEntry &R0, const RelEntry &R1) { void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, std::vector<ELFRelocationEntry> &Relocs) { - // Call the defualt function first. Relocations are sorted in descending + // Call the default function first. Relocations are sorted in descending // order of r_offset. MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); - + RelLs RelocLs; std::vector<RelLsIter> Unmatched; @@ -244,6 +261,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI); + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, + (Is64Bit) ? true : false); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 9b76eda861..1f6000cc8c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -95,6 +95,18 @@ namespace Mips { // PC relative branch fixup resulting in - R_MIPS_PC16 fixup_Mips_Branch_PCRel, + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 + fixup_Mips_GPOFF_HI, + + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 + fixup_Mips_GPOFF_LO, + + // resulting in - R_MIPS_PAGE + fixup_Mips_GOT_PAGE, + + // resulting in - R_MIPS_GOT_OFST + fixup_Mips_GOT_OFST, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 3b0e59b87a..8ab2edeca0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -187,7 +187,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isFPImm()) { return static_cast<unsigned>(APFloat(MO.getFPImm()) .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } + } // MO must be an Expr. assert(MO.isExpr()); @@ -201,10 +201,27 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } assert (Kind == MCExpr::SymbolRef); - + Mips::Fixups FixupKind = Mips::Fixups(0); switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP : + llvm_unreachable("fixup kind VK_Mips_GOT_DISP not supported for direct object!"); + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI : + FixupKind = Mips::fixup_Mips_GPOFF_HI; + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO : + FixupKind = Mips::fixup_Mips_GPOFF_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE : + FixupKind = Mips::fixup_Mips_GOT_PAGE; + break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST : + FixupKind = Mips::fixup_Mips_GOT_OFST; + break; case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; break; @@ -244,8 +261,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TPREL_LO: FixupKind = Mips::fixup_Mips_TPREL_LO; break; - default: - break; } // switch Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index ed61b642fc..411030aaa1 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -34,6 +34,7 @@ namespace llvm { FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index fc530939ed..2e0239377d 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -11,19 +11,29 @@ // //===----------------------------------------------------------------------===// +class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> { + let Predicates = [InMips16Mode]; +} + +def LI16E : FEXT_RI16<0b01101, (outs CPU16Regs:$rx), + (ins uimm16:$amt), + !strconcat("li", "\t$rx, $amt"), + [(set CPU16Regs:$rx, immZExt16:$amt )],IILoad>; + let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1, isBarrier=1, hasCtrlDep=1, rx=0, nd=0, l=0, ra=0 in -def RET16 : FRR16_JALRC < (outs), (ins CPURAReg:$target), - "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>; +def RET16 : FRR16_JALRC + < (outs), (ins CPURAReg:$target), + "jr\t$target", [(MipsRet CPURAReg:$target)], IIBranch>; // As stack alignment is always done with addiu, we need a 16-bit immediate let Defs = [SP], Uses = [SP] in { def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt), - "!ADJCALLSTACKDOWN $amt", - [(callseq_start timm:$amt)]>; + "!ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), - "!ADJCALLSTACKUP $amt1", - [(callseq_end timm:$amt1, timm:$amt2)]>; + "!ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -31,4 +41,7 @@ def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), let isCall=1, hasDelaySlot=1, nd=0, l=0, ra=0 in def JumpLinkReg16: FRR16_JALRC<(outs), (ins CPU16Regs:$rs, variable_ops), - "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + +// Small immediates +def : Mips16Pat<(i32 immZExt16:$in), (LI16E immZExt16:$in)>; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 7e129b8b8d..a5a3038827 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -230,47 +230,49 @@ def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), // extended loads let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), + (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; } // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>; - -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), - (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), - (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), - (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), + (DADDiu ZERO_64, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), + (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), + (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), + (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>; def : WrapperPat<tconstpool, DADDiu, CPU64Regs>; @@ -290,21 +292,22 @@ defm : SetgePats<CPU64Regs, SLT64, SLTu64>; defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, - Requires<[IsN64, HasStandardEncoding]>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, + Requires<[IsN64, HasStandardEncoding]>; // truncate -def : Pat<(i32 (trunc CPU64Regs:$src)), - (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, - Requires<[IsN64, HasStandardEncoding]>; +def : MipsPat<(i32 (trunc CPU64Regs:$src)), + (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, + Requires<[IsN64, HasStandardEncoding]>; // 32-to-64-bit extension -def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; -def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; -def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; +def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; // Sign extend in register -def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>; +def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), + (SLL64_64 CPU64Regs:$src)>; -// bswap pattern -def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; +// bswap MipsPattern +def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index b09c51179a..7167190f21 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,12 +18,12 @@ #include "MipsInstrInfo.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/BasicBlock.h" +#include "llvm/DebugInfo.h" +#include "llvm/Instructions.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -58,9 +58,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - MCInst TmpInst0; - MCInstLowering.Lower(MI, TmpInst0); - OutStreamer.EmitInstruction(TmpInst0); + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + do { + MCInst TmpInst0; + MCInstLowering.Lower(I++, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); + } while ((I != E) && I->isInsideBundle()); } //===----------------------------------------------------------------------===// @@ -236,15 +241,6 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { if (MipsFI->getEmitNOAT()) OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); } - - if ((MF->getTarget().getRelocationModel() == Reloc::PIC_) && - Subtarget->isABI_O32() && MipsFI->globalBaseRegSet()) { - SmallVector<MCInst, 4> MCInsts; - MCInstLowering.LowerSETGP01(MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); - } } /// EmitFunctionBodyEnd - Targets can override this to emit stuff after @@ -316,7 +312,8 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (ExtraCode[0]) { default: - return true; // Unknown modifier. + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O); case 'X': // hex const int if ((MO.getType()) != MachineOperand::MO_Immediate) return true; @@ -337,6 +334,17 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; O << MO.getImm() - 1; return false; + case 'z': { + // $0 if zero, regular printing otherwise + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + int64_t Val = MO.getImm(); + if (Val) + O << Val; + else + O << "$0"; + return false; + } } } @@ -349,11 +357,12 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + return false; } @@ -401,7 +410,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_BlockAddress: { - MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress()); + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); O << BA->getName(); break; } @@ -462,7 +471,7 @@ printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); + const MachineOperand &MO = MI->getOperand(opNum); O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 4b7e1d3766..8aadefdcd1 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -145,6 +145,58 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips FastCC Calling Convention +//===----------------------------------------------------------------------===// +def CC_MipsO32_FastCC : CallingConv<[ + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>, + + // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def CC_MipsN_FastCC : CallingConv<[ + // Integer arguments are passed in integer registers. + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64, + T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, + T8_64, V1_64]>>, + + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64, + D6_64, D7_64, D8_64, D9_64, D10_64, D11_64, + D12_64, D13_64, D14_64, D15_64, D16_64, D17_64, + D18_64, D19_64]>>, + + // Stack parameter slots for i64 and f64 are 64-bit doublewords and + // 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def CC_Mips_FastCC : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Integer arguments are passed in integer registers. All scratch registers, + // except for AT, V0 and T9, are available to be used as argument registers. + CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, + T7, T8, V1]>>, + + // f32 arguments are passed in single-precision floating pointer registers. + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, + F11, F12, F13, F14, F15, F16, F17, F18, F19]>>, + + // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>, + CCIfSubtarget<"isABI_O32()", CCDelegateTo<CC_MipsO32_FastCC>>, + CCDelegateTo<CC_MipsN_FastCC> +]>; + +//===----------------------------------------------------------------------===// // Mips Calling Convention Dispatch //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 7d819026da..c0e76399fb 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -145,8 +145,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E; ++I) emitInstruction(*I); } } while (MCE.finishFunction(MF)); diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 8b1215ab1e..b12b1f2b5a 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -61,47 +61,54 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, Instruction SLTiuOp> { - def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; - def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; - def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; + def : MipsPat< + (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; } multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction XOROp> { - def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; } multiclass MovzPats2<RegisterClass CRC, RegisterClass DRC, Instruction MOVZInst, Instruction XORiOp> { - def : Pat<(select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F), + def : MipsPat< + (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F), (MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>; } multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, Instruction XOROp> { - def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select CRC:$cond, DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; - def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; } // Instantiation of instructions. def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">; -let Predicates = [HasMips64, HasStandardEncoding],DecoderNamespace = "Mips64" in { +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">; def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> { let isCodeGenOnly = 1; @@ -139,7 +146,8 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in { def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">; def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">; } -let Predicates = [IsFP64bit, HasStandardEncoding],DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">; def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> { let isCodeGenOnly = 1; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index a8c4b05ecd..2bba8a3802 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -45,10 +45,12 @@ static cl::opt<bool> SkipDelaySlotFiller( namespace { struct Filler : public MachineFunctionPass { + typedef MachineBasicBlock::instr_iterator InstrIter; + typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; TargetMachine &TM; const TargetInstrInfo *TII; - MachineBasicBlock::iterator LastFiller; + InstrIter LastFiller; static char ID; Filler(TargetMachine &tm) @@ -71,27 +73,27 @@ namespace { } bool isDelayFiller(MachineBasicBlock &MBB, - MachineBasicBlock::iterator candidate); + InstrIter candidate); - void insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses); + void insertCallUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); - void insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses); + void insertDefsUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); - bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, + bool IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg); - bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet<unsigned, 32> &RegDefs, SmallSet<unsigned, 32> &RegUses); bool - findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler); + findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, + InstrIter &Filler); }; @@ -103,14 +105,14 @@ namespace { bool Filler:: runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - LastFiller = MBB.end(); + LastFiller = MBB.instr_end(); - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) if (I->hasDelaySlot()) { ++FilledSlots; Changed = true; - MachineBasicBlock::iterator D; + InstrIter D; if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); @@ -121,6 +123,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { // Record the filler instruction that filled the delay slot. // The instruction after it will be visited in the next iteration. LastFiller = ++I; + + // Set InsideBundle bit so that the machine verifier doesn't expect this + // instruction to be a terminator. + LastFiller->setIsInsideBundle(); } return Changed; @@ -133,8 +139,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { } bool Filler::findDelayInstr(MachineBasicBlock &MBB, - MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler) { + InstrIter slot, + InstrIter &Filler) { SmallSet<unsigned, 32> RegDefs; SmallSet<unsigned, 32> RegUses; @@ -143,13 +149,13 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) { + for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; // Convert to forward iterator. - MachineBasicBlock::iterator FI(llvm::next(I).base()); + InstrIter FI(llvm::next(I).base()); if (I->hasUnmodeledSideEffects() || I->isInlineAsm() @@ -175,7 +181,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, return false; } -bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, +bool Filler::delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet<unsigned, 32> &RegDefs, SmallSet<unsigned, 32> &RegUses) { @@ -223,9 +229,9 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, } // Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegDefs, - SmallSet<unsigned, 32>& RegUses) { +void Filler::insertDefsUses(InstrIter MI, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses) { // If MI is a call or return, just examine the explicit non-variadic operands. MCInstrDesc MCID = MI->getDesc(); unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : @@ -250,7 +256,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, } //returns true if the Reg or its alias is in the RegSet. -bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { +bool Filler::IsRegInSet(SmallSet<unsigned, 32> &RegSet, unsigned Reg) { // Check Reg and all aliased Registers. for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); AI.isValid(); ++AI) diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 27609c13ea..5afd2fc576 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -94,38 +94,6 @@ bool MipsFrameLowering::targetHandlesStackFrameRounding() const { return true; } -// Build an instruction sequence to load an immediate that is too large to fit -// in 16-bit and add the result to Reg. -static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, - const MipsInstrInfo &TII, MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL) { - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate AnalyzeImm; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. - for (++Inst; Inst != Seq.end(); ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); -} - void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -144,9 +112,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // First, compute final stack size. unsigned StackAlign = getStackAlignment(); - uint64_t StackSize = - RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign) + - RoundUpToAlignment(MFI->getStackSize(), StackAlign); + uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign); + + if (MipsFI->globalBaseRegSet()) + StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign; + else + StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign); // Update stack size MFI->setStackSize(StackSize); @@ -162,8 +133,12 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); else { // Expand immediate that doesn't fit in 16-bit. - MipsFI->setEmitNOAT(); - expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, + 0); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); } // emit ".cfi_def_cfa_offset StackSize" @@ -264,14 +239,20 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, // Adjust stack. if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); - else // Expand immediate that doesn't fit in 16-bit. - expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + else { // Expand immediate that doesn't fit in 16-bit. + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, + 0); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); + } } void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MachineRegisterInfo& MRI = MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index b1ac73579f..f1c672ad86 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -117,15 +117,22 @@ private: void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - if (!MipsFI->globalBaseRegSet()) + if (((MF.getTarget().getRelocationModel() == Reloc::Static) || + Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo(); + const MipsInstrInfo *MII = TM.getInstrInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + int FI = 0; + + if (!Subtarget.inMips16Mode()) + FI= MipsFI->initGlobalRegFI(); const TargetRegisterClass *RC = Subtarget.isABI_N64() ? (const TargetRegisterClass*)&Mips::CPU64RegsRegClass : @@ -144,9 +151,12 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { const GlobalValue *FName = MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } @@ -159,6 +169,8 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } @@ -175,11 +187,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, + TargetRegInfo); return; } assert(Subtarget.isABI_O32()); + if (Subtarget.inMips16Mode()) + return; // no need to load GP. It can be calculated anywhere + + // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // @@ -201,6 +219,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); + MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo); } bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, @@ -274,7 +293,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // If Parent is an unaligned f32 load or store, select a (base + index) // floating point load/store instruction (luxc1 or suxc1). - const LSBaseSDNode* LS = 0; + const LSBaseSDNode *LS = 0; if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { EVT VT = LS->getMemoryVT(); @@ -335,17 +354,18 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1); - if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || - isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { + SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { Base = Addr.getOperand(0); - Offset = LoVal.getOperand(0); + Offset = Opnd0; return true; } } // If an indexed floating point load/store can be emitted, return false. - if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/) return false; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 04d4743b35..bc0a616e33 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -304,6 +304,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::ADD); setMinFunctionAlignment(HasMips64 ? 3 : 2); @@ -312,6 +313,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); + + maxStoresPerMemcpy = 16; } bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { @@ -340,17 +343,17 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { +static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. - SDNode* ADDCNode = ADDENode->getOperand(2).getNode(); + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); if (ADDCNode->getOpcode() != ISD::ADDC) return false; SDValue MultHi = ADDENode->getOperand(0); SDValue MultLo = ADDCNode->getOperand(0); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -413,17 +416,17 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { +static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. - SDNode* SUBCNode = SUBENode->getOperand(2).getNode(); + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); if (SUBCNode->getOpcode() != ISD::SUBC) return false; SDValue MultHi = SUBENode->getOperand(1); SDValue MultLo = SUBCNode->getOperand(1); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -478,9 +481,9 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { return true; } -static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -491,9 +494,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -504,9 +507,9 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -581,7 +584,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) { // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. -static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { +static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; @@ -603,7 +606,7 @@ static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { } // Creates and returns a CMovFPT/F node. -static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, +static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { bool invert = InvertFPCondCode((Mips::CondCode) cast<ConstantSDNode>(Cond.getOperand(2)) @@ -613,9 +616,9 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -639,16 +642,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, const DebugLoc DL = N->getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); SDValue True = N->getOperand(1); - + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); - + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } -static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, size, pos @@ -686,9 +689,9 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize, MVT::i32)); } -static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 @@ -740,6 +743,33 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0)); } +static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Add = N->getOperand(1); + + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue Lo = Add.getOperand(1); + + if ((Lo.getOpcode() != MipsISD::Lo) || + (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable)) + return SDValue(); + + EVT ValTy = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0), + Add.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -755,11 +785,13 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) case ISD::UDIVREM: return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: - return PerformSELECTCombine(N, DAG, DCI, Subtarget); + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DAG, DCI, Subtarget); + case ISD::ADD: + return PerformADDCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -832,7 +864,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { /* static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, DebugLoc dl, - const MipsSubtarget* Subtarget, + const MipsSubtarget *Subtarget, const TargetInstrInfo *TII, bool isFPCmp, unsigned Opc) { // There is no need to expand CMov instructions if target has @@ -2053,7 +2085,7 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // TODO: set SType according to the desired memory barrier behavior. SDValue -MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { +MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { unsigned SType = 0; DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), @@ -2061,7 +2093,7 @@ MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { } SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; @@ -2071,7 +2103,7 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, } SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); @@ -2093,15 +2125,15 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(0x20, MVT::i32)); - Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, DAG.getConstant(0, MVT::i32), - ShiftLeftLo); + Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, + DAG.getConstant(0, MVT::i32), ShiftLeftLo); Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, 2, DL); } -SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, +SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); @@ -2144,17 +2176,15 @@ SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, SDValue Chain, SDValue Src, unsigned Offset) { - SDValue BasePtr = LD->getBasePtr(), Ptr; + SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); - EVT BasePtrVT = BasePtr.getValueType(); + EVT BasePtrVT = Ptr.getValueType(); DebugLoc DL = LD->getDebugLoc(); SDVTList VTList = DAG.getVTList(VT, MVT::Other); if (Offset) - Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr, + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, BasePtrVT)); - else - Ptr = BasePtr; SDValue Ops[] = { Chain, Ptr, Src }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, @@ -2225,17 +2255,14 @@ SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, SDValue Chain, unsigned Offset) { - SDValue BasePtr = SD->getBasePtr(), Ptr, Value = SD->getValue(); - EVT MemVT = SD->getMemoryVT(); - EVT BasePtrVT = BasePtr.getValueType(); + SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); + EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); DebugLoc DL = SD->getDebugLoc(); SDVTList VTList = DAG.getVTList(MVT::Other); if (Offset) - Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, BasePtr, + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, BasePtrVT)); - else - Ptr = BasePtr; SDValue Ops[] = { Chain, Value, Ptr }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, @@ -2472,10 +2499,10 @@ static unsigned getNextIntArgReg(unsigned Reg) { // Write ByVal Arg to arg registers and stack. static void WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, - SmallVector<SDValue, 8>& MemOpChains, int& LastFI, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, int &LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MVT PtrType, bool isLittle) { unsigned LocMemOffset = VA.getLocMemOffset(); unsigned Offset = 0; @@ -2563,10 +2590,10 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, // Copy Mips64 byVal arg to registers and stack. void static PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass, - SmallVector<SDValue, 8>& MemOpChains, int& LastFI, + SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, + SmallVector<SDValue, 8> &MemOpChains, int &LastFI, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, EVT PtrTy, bool isLittle) { unsigned ByValSize = Flags.getByValSize(); unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); @@ -2679,7 +2706,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); else if (HasMips64) AnalyzeMips64CallOperands(CCInfo, Outs); @@ -2704,7 +2733,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. - if (IsO32) + if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); @@ -2958,7 +2987,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Mips); @@ -2977,9 +3006,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue>& OutChains, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, const Argument *FuncArg) { unsigned LocMem = VA.getLocMemOffset(); unsigned FirstWord = LocMem / 4; @@ -3004,8 +3033,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, // Create frame object on stack and copy registers used for byval passing to it. static unsigned CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector<SDValue>& OutChains, SelectionDAG &DAG, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + std::vector<SDValue> &OutChains, SelectionDAG &DAG, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, EVT PtrTy, const Argument *FuncArg) { @@ -3064,7 +3093,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -3250,7 +3281,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); @@ -3398,6 +3429,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'r': if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) return std::make_pair(0U, &Mips::CPURegsRegClass); + if (VT == MVT::i64 && !HasMips64) + return std::make_pair(0U, &Mips::CPURegsRegClass); if (VT == MVT::i64 && HasMips64) return std::make_pair(0U, &Mips::CPU64RegsRegClass); // This will generate an error message @@ -3530,6 +3563,16 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } +EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const { + if (Subtarget->hasMips64()) + return MVT::i64; + + return MVT::i32; +} + bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 5342e37f28..b9975c550b 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -146,7 +146,8 @@ namespace llvm { SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + bool IsSRA) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; @@ -202,6 +203,11 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 29bd2dc494..c757b4c33f 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -54,10 +54,14 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, + AssemblerPredicate<"FeatureFP64Bit">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, + AssemblerPredicate<"!FeatureFP64Bit">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, + AssemblerPredicate<"FeatureSingleFloat">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, + AssemblerPredicate<"!FeatureSingleFloat">; // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ @@ -428,46 +432,52 @@ def ExtractElementF64 : //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// -def : Pat<(f32 fpimm0), (MTC1 ZERO)>; -def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; +def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>; +def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; -def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; -def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; +def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; +def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; let Predicates = [NotFP64bit, HasStandardEncoding] in { - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; - def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; - def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D32_W (MTC1 CPURegs:$src))>; + def : MipsPat<(i32 (fp_to_sint AFGR64:$src)), + (MFC1 (TRUNC_W_D32 AFGR64:$src))>; + def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; } let Predicates = [IsFP64bit, HasStandardEncoding] in { - def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>; - def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; + def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>; + def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>; - def : Pat<(f32 (sint_to_fp CPU64Regs:$src)), - (CVT_S_L (DMTC1 CPU64Regs:$src))>; - def : Pat<(f64 (sint_to_fp CPU64Regs:$src)), - (CVT_D64_L (DMTC1 CPU64Regs:$src))>; + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D64_W (MTC1 CPURegs:$src))>; + def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)), + (CVT_S_L (DMTC1 CPU64Regs:$src))>; + def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)), + (CVT_D64_L (DMTC1 CPU64Regs:$src))>; - def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; - def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; - def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; + def : MipsPat<(i32 (fp_to_sint FGR64:$src)), + (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR64:$src)), + (DMFC1 (TRUNC_L_D64 FGR64:$src))>; - def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; + def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } // Patterns for unaligned floating point loads and stores. let Predicates = [HasMips32r2Or64, NotN64, NotNaCl/*@LOCALMOD*/] in { - def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; - def : Pat<(store_u FGR32:$src, CPURegs:$addr), - (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; + def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : MipsPat<(store_u FGR32:$src, CPURegs:$addr), + (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; } let Predicates = [IsN64, NotNaCl/*@LOCALMOD*/] in { - def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; - def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), - (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; + def : MipsPat<(f32 (load_u CPU64Regs:$addr)), + (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr), + (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index c01830d509..e4eefb9905 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" @@ -329,9 +330,9 @@ unsigned Mips::GetOppositeBranchOpc(unsigned Opc) } } -static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, +static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, MachineBasicBlock *&BB, - SmallVectorImpl<MachineOperand>& Cond) { + SmallVectorImpl<MachineOperand> &Cond) { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); @@ -505,3 +506,58 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const return false; } +/// Return the number of bytes of code the specified instruction may be. +unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + return MI->getDesc().getSize(); + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + } +} + +unsigned +llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst) { + MipsAnalyzeImmediate AnalyzeImm; + unsigned Size = IsN64 ? 64 : 32; + unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + if (LastInst && (Seq.size() == 1)) { + *LastInst = *Inst; + return 0; + } + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. Skip the last instruction if + // LastInst is not 0. + for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInst) + *LastInst = *Inst; + + return Seq.size() - !!LastInst; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 51cc9afdfa..7a0065b634 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,6 +15,7 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -24,12 +25,6 @@ namespace llvm { -namespace Mips { - /// GetOppositeBranchOpc - Return the inverse of the specified - /// opcode, e.g. turning BEQ to BNE. - unsigned GetOppositeBranchOpc(unsigned Opc); -} - class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; @@ -109,8 +104,27 @@ public: /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + + /// Return the number of bytes of code the specified instruction may be. + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; }; +namespace Mips { + /// GetOppositeBranchOpc - Return the inverse of the specified + /// opcode, e.g. turning BEQ to BNE. + unsigned GetOppositeBranchOpc(unsigned Opc); + + /// Emit a series of instructions to load an immediate. All instructions + /// except for the last one are emitted. The function returns the number of + /// MachineInstrs generated. The opcode-immediate pair of the last + /// instruction is returned in LastInst, if it is not 0. + unsigned + loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, MachineBasicBlock::iterator II, + DebugLoc DL, bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst); +} + } #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 60343293e8..5e388281b5 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -174,6 +174,10 @@ def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">, def IsNaCl : Predicate<"Subtarget.isTargetNaCl()">; def NotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; +class MipsPat<dag pattern, dag result> : Pat<pattern, result> { + let Predicates = [HasStandardEncoding]; +} + //===----------------------------------------------------------------------===// // Instruction format superclass //===----------------------------------------------------------------------===// @@ -218,6 +222,7 @@ def mem : Operand<i32> { def mem64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); + let EncoderMethod = "getMemEncoding"; } def mem_ea : Operand<i32> { @@ -563,6 +568,7 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, @@ -574,6 +580,7 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } // SetCC @@ -603,6 +610,7 @@ class JumpFJ<bits<6> op, string instr_asm>: let hasDelaySlot = 1; let Predicates = [RelocStatic, HasStandardEncoding]; let DecoderMethod = "DecodeJumpTarget"; + let Defs = [AT]; } // Unconditional branch @@ -616,6 +624,7 @@ class UncondBranch<bits<6> op, string instr_asm>: let isBarrier = 1; let hasDelaySlot = 1; let Predicates = [RelocPIC, HasStandardEncoding]; + let Defs = [AT]; } let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, @@ -1087,67 +1096,67 @@ def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// // Small immediates -def : Pat<(i32 immSExt16:$in), - (ADDiu ZERO, imm:$in)>; -def : Pat<(i32 immZExt16:$in), - (ORi ZERO, imm:$in)>; -def : Pat<(i32 immLow16Zero:$in), - (LUi (HI16 imm:$in))>; +def : MipsPat<(i32 immSExt16:$in), + (ADDiu ZERO, imm:$in)>; +def : MipsPat<(i32 immZExt16:$in), + (ORi ZERO, imm:$in)>; +def : MipsPat<(i32 immLow16Zero:$in), + (LUi (HI16 imm:$in))>; // Arbitrary immediates -def : Pat<(i32 imm:$imm), +def : MipsPat<(i32 imm:$imm), (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>; -// Carry patterns -def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), - (SUBu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), - (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$src, immSExt16:$imm), - (ADDiu CPURegs:$src, imm:$imm)>; +// Carry MipsPatterns +def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs), + (SUBu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs), + (ADDu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$src, immSExt16:$imm), + (ADDiu CPURegs:$src, imm:$imm)>; // Call -def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), - (JAL tglobaladdr:$dst)>; -def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), - (JAL texternalsym:$dst)>; -//def : Pat<(MipsJmpLink CPURegs:$dst), -// (JALR CPURegs:$dst)>; +def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (JAL tglobaladdr:$dst)>; +def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), + (JAL texternalsym:$dst)>; +//def : MipsPat<(MipsJmpLink CPURegs:$dst), +// (JALR CPURegs:$dst)>; // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; - -def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), - (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), - (ADDiu CPURegs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), - (ADDiu CPURegs:$hi, tjumptable:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), - (ADDiu CPURegs:$hi, tconstpool:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), + (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), + (ADDiu CPURegs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), + (ADDiu CPURegs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), + (ADDiu CPURegs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; // gp_rel relocs -def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), - (ADDiu CPURegs:$gp, tglobaladdr:$in)>; -def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), - (ADDiu CPURegs:$gp, tconstpool:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), + (ADDiu CPURegs:$gp, tglobaladdr:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), + (ADDiu CPURegs:$gp, tconstpool:$in)>; // wrapper_pic class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>: - Pat<(MipsWrapper RC:$gp, node:$in), - (ADDiuOp RC:$gp, node:$in)>; + MipsPat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; def : WrapperPat<tglobaladdr, ADDiu, CPURegs>; def : WrapperPat<tconstpool, ADDiu, CPURegs>; @@ -1157,58 +1166,58 @@ def : WrapperPat<tjumptable, ADDiu, CPURegs>; def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>; // Mips does not have "not", so we expand our way -def : Pat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; +def : MipsPat<(not CPURegs:$in), + (NOR CPURegs:$in, ZERO)>; // extended loads let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; } // peepholes let Predicates = [NotN64, HasStandardEncoding] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; + def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; } let Predicates = [IsN64, HasStandardEncoding] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; } // brcond patterns multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp, Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp, Instruction SLTiuOp, Register ZEROReg> { -def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), - (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), - (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond RC:$cond, bb:$dst), - (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; } defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; @@ -1216,39 +1225,39 @@ defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>; // setcc patterns multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp, Instruction SLTuOp, Register ZEROReg> { - def : Pat<(seteq RC:$lhs, RC:$rhs), - (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setne RC:$lhs, RC:$rhs), - (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; + def : MipsPat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; } multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setle RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; - def : Pat<(setule RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setle RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setule RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; } multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setgt RC:$lhs, RC:$rhs), - (SLTOp RC:$rhs, RC:$lhs)>; - def : Pat<(setugt RC:$lhs, RC:$rhs), - (SLTuOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; } multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> { - def : Pat<(setge RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; } multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp, Instruction SLTiuOp> { - def : Pat<(setge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; } defm : SeteqPats<CPURegs, SLTiu, XOR, SLTu, ZERO>; @@ -1258,10 +1267,10 @@ defm : SetgePats<CPURegs, SLT, SLTu>; defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; // bswap pattern -def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; +def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; //===----------------------------------------------------------------------===// // Floating Point Support diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 76ca3e1767..150bdbbe6f 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -154,8 +154,8 @@ TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { return Result; } -void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { +void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) { JCE.emitAlignment(4); void *Addr = (void*) (JCE.getCurrentPCValue()); if (!sys::Memory::setRangeWritable(Addr, 16)) @@ -193,7 +193,7 @@ void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { + unsigned NumRelocs, unsigned char *GOTBase) { for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index f4c4ae86d3..637a318660 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -45,8 +45,8 @@ class MipsJITInfo : public TargetJITInfo { /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); + virtual void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE); /// getLazyResolverFunction - Expose the lazy resolver to the JIT. virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); @@ -55,7 +55,7 @@ class MipsJITInfo : public TargetJITInfo { /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); + unsigned NumRelocs, unsigned char *GOTBase); /// Initialize - Initialize internal stage for the function being JITted. void Initialize(const MachineFunction &MF, bool isPIC) { diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp new file mode 100644 index 0000000000..7be353f190 --- /dev/null +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -0,0 +1,418 @@ +//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands a branch or jump instruction into a long branch if its +// offset is too large to fit into its immediate field. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-long-branch" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(LongBranches, "Number of long branches."); + +static cl::opt<bool> SkipLongBranch( + "skip-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Skip long branch pass."), + cl::Hidden); + +static cl::opt<bool> ForceLongBranch( + "force-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Expand all branches to long format."), + cl::Hidden); + +namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + + struct MBBInfo { + uint64_t Size; + bool HasLongBranch; + MachineInstr *Br; + + MBBInfo() : Size(0), HasLongBranch(false), Br(0) {} + }; + + class MipsLongBranch : public MachineFunctionPass { + + public: + static char ID; + MipsLongBranch(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())) {} + + virtual const char *getPassName() const { + return "Mips Long Branch"; + } + + bool runOnMachineFunction(MachineFunction &F); + + private: + void splitMBB(MachineBasicBlock *MBB); + void initMBBInfo(); + int64_t computeOffset(const MachineInstr *Br); + bool offsetFitsIntoField(const MachineInstr *Br); + unsigned addLongBranch(MachineBasicBlock &MBB, Iter Pos, + MachineBasicBlock *Tgt, DebugLoc DL, bool Nop); + void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, + MachineBasicBlock *MBBOpnd); + void expandToLongBranch(MBBInfo &Info); + + const TargetMachine &TM; + const MipsInstrInfo *TII; + MachineFunction *MF; + SmallVector<MBBInfo, 16> MBBInfos; + }; + + char MipsLongBranch::ID = 0; +} // end of anonymous namespace + +/// createMipsLongBranchPass - Returns a pass that converts branches to long +/// branches. +FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) { + return new MipsLongBranch(tm); +} + +/// Iterate over list of Br's operands and search for a MachineBasicBlock +/// operand. +static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { + for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) { + const MachineOperand &MO = Br.getOperand(I); + + if (MO.isMBB()) + return MO.getMBB(); + } + + assert(false && "This instruction does not have an MBB operand."); + return 0; +} + +// Traverse the list of instructions backwards until a non-debug instruction is +// found or it reaches E. +static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) { + for (; B != E; ++B) + if (!B->isDebugValue()) + return B; + + return E; +} + +// Split MBB if it has two direct jumps/branches. +void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) { + ReverseIter End = MBB->rend(); + ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End); + + // Return if MBB has no branch instructions. + if ((LastBr == End) || + (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) + return; + + ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End); + + // MBB has only one branch instruction if FirstBr is not a branch + // instruction. + if ((FirstBr == End) || + (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) + return; + + assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found."); + + // Create a new MBB. Move instructions in MBB to the newly created MBB. + MachineBasicBlock *NewMBB = + MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + + // Insert NewMBB and fix control flow. + MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); + NewMBB->transferSuccessors(MBB); + NewMBB->removeSuccessor(Tgt); + MBB->addSuccessor(NewMBB); + MBB->addSuccessor(Tgt); + MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + + NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end()); +} + +// Fill MBBInfos. +void MipsLongBranch::initMBBInfo() { + // Split the MBBs if they have two branches. Each basic block should have at + // most one branch after this loop is executed. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;) + splitMBB(I++); + + MF->RenumberBlocks(); + MBBInfos.clear(); + MBBInfos.resize(MF->size()); + + for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + + // Compute size of MBB. + for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin(); + MI != MBB->instr_end(); ++MI) + MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI); + + // Search for MBB's branch instruction. + ReverseIter End = MBB->rend(); + ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End); + + if ((Br != End) && !Br->isIndirectBranch() && + (Br->isConditionalBranch() || Br->isUnconditionalBranch())) + MBBInfos[I].Br = (++Br).base(); + } +} + +// Compute offset of branch in number of bytes. +int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { + int64_t Offset = 0; + int ThisMBB = Br->getParent()->getNumber(); + int TargetMBB = getTargetMBB(*Br)->getNumber(); + + // Compute offset of a forward branch. + if (ThisMBB < TargetMBB) { + for (int N = ThisMBB + 1; N < TargetMBB; ++N) + Offset += MBBInfos[N].Size; + + return Offset + 4; + } + + // Compute offset of a backward branch. + for (int N = ThisMBB; N >= TargetMBB; --N) + Offset += MBBInfos[N].Size; + + return -Offset + 4; +} + +// Insert the following sequence: +// (pic or N64) +// lw $at, global_reg_slot +// lw $at, got($L1)($at) +// addiu $at, $at, lo($L1) +// jr $at +// noop +// (static and !N64) +// lui $at, hi($L1) +// addiu $at, $at, lo($L1) +// jr $at +// noop +unsigned MipsLongBranch::addLongBranch(MachineBasicBlock &MBB, Iter Pos, + MachineBasicBlock *Tgt, DebugLoc DL, + bool Nop) { + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + unsigned ABI = TM.getSubtarget<MipsSubtarget>().getTargetABI(); + bool N64 = (ABI == MipsSubtarget::N64); + unsigned NumInstrs; + + if (IsPIC || N64) { + bool HasMips64 = TM.getSubtarget<MipsSubtarget>().hasMips64(); + unsigned AT = N64 ? Mips::AT_64 : Mips::AT; + unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; + unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; + unsigned JR = N64 ? Mips::JR64 : Mips::JR; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + const MipsRegisterInfo *MRI = + static_cast<const MipsRegisterInfo*>(TM.getRegisterInfo()); + unsigned SP = MRI->getFrameRegister(*MF); + unsigned GlobalRegFI = MF->getInfo<MipsFunctionInfo>()->getGlobalRegFI(); + int64_t Offset = MF->getFrameInfo()->getObjectOffset(GlobalRegFI); + + if (isInt<16>(Offset)) { + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(SP).addImm(Offset); + NumInstrs = 1; + } else { + unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; + MipsAnalyzeImmediate::Inst LastInst(0, 0); + + MF->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + NumInstrs = Mips::loadImmediate(Offset, N64, *TII, MBB, Pos, DL, true, + &LastInst) + 2; + BuildMI(MBB, Pos, DL, TII->get(ADDu), AT).addReg(SP).addReg(AT); + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT) + .addImm(SignExtend64<16>(LastInst.ImmOpnd)); + } + + BuildMI(MBB, Pos, DL, TII->get(Load), AT).addReg(AT).addMBB(Tgt, GOTFlag); + BuildMI(MBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT).addMBB(Tgt, OFSTFlag); + BuildMI(MBB, Pos, DL, TII->get(JR)).addReg(Mips::AT, RegState::Kill); + NumInstrs += 3; + } else { + BuildMI(MBB, Pos, DL, TII->get(Mips::LUi), Mips::AT) + .addMBB(Tgt, MipsII::MO_ABS_HI); + BuildMI(MBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) + .addReg(Mips::AT).addMBB(Tgt, MipsII::MO_ABS_LO); + BuildMI(MBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT, RegState::Kill); + NumInstrs = 3; + } + + if (Nop) { + BuildMI(MBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); + ++NumInstrs; + } + + return NumInstrs; +} + +// Replace Br with a branch which has the opposite condition code and a +// MachineBasicBlock operand MBBOpnd. +void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, + DebugLoc DL, MachineBasicBlock *MBBOpnd) { + unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode()); + const MCInstrDesc &NewDesc = TII->get(NewOpc); + + MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); + + for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) { + MachineOperand &MO = Br->getOperand(I); + + if (!MO.isReg()) { + assert(MO.isMBB() && "MBB operand expected."); + break; + } + + MIB.addReg(MO.getReg()); + } + + MIB.addMBB(MBBOpnd); + + Br->eraseFromParent(); +} + +// Expand branch instructions to long branches. +void MipsLongBranch::expandToLongBranch(MBBInfo &I) { + I.HasLongBranch = true; + + MachineBasicBlock *MBB = I.Br->getParent(), *Tgt = getTargetMBB(*I.Br); + DebugLoc DL = I.Br->getDebugLoc(); + + if (I.Br->isUnconditionalBranch()) { + // Unconditional branch before transformation: + // b $tgt + // delay-slot-instr + // + // after transformation: + // delay-slot-instr + // lw $at, global_reg_slot + // lw $at, %got($tgt)($at) + // addiu $at, $at, %lo($tgt) + // jr $at + // nop + I.Size += (addLongBranch(*MBB, llvm::next(Iter(I.Br)), Tgt, DL, true) + - 1) * 4; + + // Remove branch and clear InsideBundle bit of the next instruction. + llvm::next(MachineBasicBlock::instr_iterator(I.Br)) + ->setIsInsideBundle(false); + I.Br->eraseFromParent(); + return; + } + + assert(I.Br->isConditionalBranch() && "Conditional branch expected."); + + // Conditional branch before transformation: + // b cc, $tgt + // delay-slot-instr + // FallThrough: + // + // after transformation: + // b !cc, FallThrough + // delay-slot-instr + // NewMBB: + // lw $at, global_reg_slot + // lw $at, %got($tgt)($at) + // addiu $at, $at, %lo($tgt) + // jr $at + // noop + // FallThrough: + + MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + MBB->removeSuccessor(Tgt); + MBB->addSuccessor(NewMBB); + NewMBB->addSuccessor(Tgt); + + I.Size += addLongBranch(*NewMBB, NewMBB->begin(), Tgt, DL, true) * 4; + replaceBranch(*MBB, I.Br, DL, *MBB->succ_begin()); +} + +static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { + MachineBasicBlock &MBB = F.front(); + MachineBasicBlock::iterator I = MBB.begin(); + DebugLoc DL = MBB.findDebugLoc(MBB.begin()); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0) + .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + MBB.removeLiveIn(Mips::V0); +} + +bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { + if ((TM.getRelocationModel() == Reloc::PIC_) && + TM.getSubtarget<MipsSubtarget>().isABI_O32() && + F.getInfo<MipsFunctionInfo>()->globalBaseRegSet()) + emitGPDisp(F, TII); + + if (SkipLongBranch) + return true; + + MF = &F; + initMBBInfo(); + + bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + SmallVector<MBBInfo, 16>::iterator I, E = MBBInfos.end(); + bool EverMadeChange = false, MadeChange = true; + + while (MadeChange) { + MadeChange = false; + + for (I = MBBInfos.begin(); I != E; ++I) { + // Skip if this MBB doesn't have a branch or the branch has already been + // converted to a long branch. + if (!I->Br || I->HasLongBranch) + continue; + + if (!ForceLongBranch) { + int64_t Offset = computeOffset(I->Br); + + // Check if offset fits into 16-bit immediate field of branches. + if ((I->Br->isConditionalBranch() || IsPIC) && isInt<16>(Offset / 4)) + continue; + + // Check if offset fits into 26-bit immediate field of jumps (J). + if (I->Br->isUnconditionalBranch() && !IsPIC && isInt<26>(Offset / 4)) + continue; + } + + expandToLongBranch(*I); + ++LongBranches; + EverMadeChange = MadeChange = true; + } + } + + if (EverMadeChange) + MF->RenumberBlocks(); + + return true; +} diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 0475777eac..ac33619c34 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -29,7 +29,7 @@ using namespace llvm; MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter) : AsmPrinter(asmprinter) {} -void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) { +void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) { Mang = M; Ctx = C; } @@ -105,21 +105,23 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, assert(Offset > 0); const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); - return MCOperand::CreateExpr(AddExpr); + const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + return MCOperand::CreateExpr(Add); } -static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0, - const MCOperand& Opnd1, - const MCOperand& Opnd2 = MCOperand()) { +/* +static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0, + const MCOperand &Opnd1, + const MCOperand &Opnd2 = MCOperand()) { Inst.setOpcode(Opc); Inst.addOperand(Opnd0); Inst.addOperand(Opnd1); if (Opnd2.isValid()) Inst.addOperand(Opnd2); } +*/ -MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, unsigned offset) const { MachineOperandType MOTy = MO.getType(); @@ -157,11 +159,6 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -// Create the following two instructions: -// "lui $2, %hi(_gp_disp)" -// "addiu $2, $2, %lo(_gp_disp)" -void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) { - MCOperand RegOpnd = MCOperand::CreateReg(Mips::V0); MCInst Instr4, Mask1, Mask2; // @LOCALMOD // @LOCALMOD-START MCOperand MaskReg = MCOperand::CreateReg(Mips::LoadStoreStackMaskReg); @@ -205,18 +202,4 @@ void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) { llvm_unreachable("unaligned instruction not sandboxed"); } } - // @LOCALMOD-END - StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); - const MCSymbolRefExpr *MCSym; - - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); - MCOperand SymHi = MCOperand::CreateExpr(MCSym); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); - MCOperand SymLo = MCOperand::CreateExpr(MCSym); - - MCInsts.resize(2); - - CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi); - CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo); -} + // @LOCALMOD-END
\ No newline at end of file diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 0f4944e423..314420a170 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -31,9 +31,8 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { MipsAsmPrinter &AsmPrinter; public: MipsMCInstLower(MipsAsmPrinter &asmprinter); - void Initialize(Mangler *mang, MCContext* C); + void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerSETGP01(SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 04cb3f22b2..b2232c6573 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,8 +14,11 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H +#include "MipsSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" #include <utility> namespace llvm { @@ -45,6 +48,7 @@ class MipsFunctionInfo : public MachineFunctionInfo { // OutArgFIRange: Range of indices of all frame objects created during call to // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; + int GlobalRegFI; mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; @@ -54,7 +58,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), DynAllocFI(0), + OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0), MaxCallFrameSize(0), EmitNOAT(false) {} @@ -73,6 +77,24 @@ public: OutArgFIRange.second = LastFI; } + bool isGlobalRegFI(int FI) const { + return GlobalRegFI && (FI == GlobalRegFI); + } + + int getGlobalRegFI() const { + return GlobalRegFI; + } + + int initGlobalRegFI() { + const TargetMachine &TM = MF.getTarget(); + unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4; + int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment(); + uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment); + + GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true); + return GlobalRegFI; + } + // The first call to this function creates a frame object for dynamically // allocated stack area. int getDynAllocFI() const { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 203cd9031c..3572f7d4d4 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -16,9 +16,11 @@ #include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" #include "MipsSubtarget.h" #include "MipsMachineFunction.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Type.h" #include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" @@ -35,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/DebugInfo.h" #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" @@ -54,8 +55,7 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } /// Mips Callee Saved Registers const uint16_t* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const -{ +getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; else if (!Subtarget.hasMips64()) @@ -64,12 +64,11 @@ getCalleeSavedRegs(const MachineFunction *MF) const return CSR_N32_SaveList; assert(Subtarget.isABI_N64()); - return CSR_N64_SaveList; + return CSR_N64_SaveList; } const uint32_t* -MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const -{ +MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_RegMask; else if (!Subtarget.hasMips64()) @@ -78,7 +77,7 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const return CSR_N32_RegMask; assert(Subtarget.isABI_N64()); - return CSR_N64_RegMask; + return CSR_N64_RegMask; } BitVector MipsRegisterInfo:: @@ -212,7 +211,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex)) + if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || + MipsFI->isGlobalRegFI(FrameIndex)) Offset = spOffset; else Offset = spOffset + (int64_t)stackSize; @@ -226,37 +226,17 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, if (!MI.isDebugValue() && !isInt<16>(Offset)) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = Subtarget.isABI_N64() ? 64 : 32; - unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + MipsAnalyzeImmediate::Inst LastInst(0, 0); MipsFI->setEmitNOAT(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq except for the last one. - for (++Inst; Inst != Seq.end() - 1; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - + Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, + &LastInst); BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); FrameReg = ATReg; - Offset = SignExtend64<16>(Inst->ImmOpnd); + Offset = SignExtend64<16>(LastInst.ImmOpnd); } MI.getOperand(i).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 6d3f83f506..f320baed64 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,7 +42,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 835ac6d05b..c5d6bf9811 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -61,8 +61,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, bool MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { + TargetSubtargetInfo::AntiDepBreakMode &Mode, + RegClassVector &CriticalPathRCs) const { Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); CriticalPathRCs.push_back(hasMips64() ? diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 8b67572348..78e80148f2 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -116,7 +116,7 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - PM->add(createMipsISelDag(getMipsTargetMachine())); + addPass(createMipsISelDag(getMipsTargetMachine())); return false; } @@ -124,12 +124,18 @@ bool MipsPassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsPassConfig::addPreEmitPass() { - PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine())); + MipsTargetMachine &TM = getMipsTargetMachine(); + addPass(createMipsDelaySlotFillerPass(TM)); + + // NOTE: long branch has not been implemented for mips16. + if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding()) + addPass(createMipsLongBranchPass(TM)); + // @LOCALMOD-START if (getMipsSubtarget().isTargetNaCl()) { // This pass does all the heavy sfi lifting. - PM->add(createMipsNaClRewritePass()); + addPass(createMipsNaClRewritePass()); } // @LOCALMOD-END diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 80c00e80f1..5cbf057416 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -69,9 +69,7 @@ namespace llvm { // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE); - + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); }; /// MipsebTargetMachine - Mips32 big endian target machine. diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index a32a78ac83..7cb16b4dd8 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -27,6 +27,7 @@ set(NVPTXCodeGen_sources add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) +add_dependencies(LLVMNVPTXCodeGen intrinsics_gen) add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 696f459ce2..f2b96163f4 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" #include "NVPTXTargetMachine.h" #include "NVPTXRegisterInfo.h" -#include "NVPTXAsmPrinter.h" +#include "NVPTXUtilities.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTXNumRegisters.h" -#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/GlobalVariable.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -36,17 +37,13 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/DerivedTypes.h" -#include "NVPTXUtilities.h" #include "llvm/Support/TimeValue.h" -#include <sstream> #include "llvm/Support/CommandLine.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Path.h" #include "llvm/Assembly/Writer.h" #include "cl_common_defines.h" - - +#include <sstream> using namespace llvm; @@ -1914,7 +1911,9 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'r': break; } diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 84c7232236..56b237252d 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" +#include "NVPTXLowerAggrCopies.h" #include "llvm/Constants.h" -#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "NVPTXLowerAggrCopies.h" #include "llvm/Target/TargetData.h" -#include "llvm/LLVMContext.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 826b1dd34b..433f415a87 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -120,11 +120,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } bool NVPTXPassConfig::addInstSelector() { - PM->add(createLowerAggrCopies()); - PM->add(createSplitBBatBarPass()); - PM->add(createAllocaHoisting()); - PM->add(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); - PM->add(createVectorElementizePass(getNVPTXTargetMachine())); + addPass(createLowerAggrCopies()); + addPass(createSplitBBatBarPass()); + addPass(createAllocaHoisting()); + addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + addPass(createVectorElementizePass(getNVPTXTargetMachine())); return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1d82e5c677..b3f9cace6b 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -48,12 +48,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // bool DisableVerify, MCContext *&OutCtx); public: - //virtual bool addPassesToEmitFile(PassManagerBase &PM, - // formatted_raw_ostream &Out, - // CodeGenFileType FileType, - // CodeGenOpt::Level OptLevel, - // bool DisableVerify = true) ; - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 7204926526..192d18d664 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -29,6 +29,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) +add_dependencies(LLVMPowerPCCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 61d23ce06a..d175e3e79e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { - assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); unsigned Code = MI->getOperand(OpNo).getImm(); + if (!Modifier) { + unsigned CCReg = MI->getOperand(OpNo+1).getReg(); + unsigned RegNo; + switch (CCReg) { + default: llvm_unreachable("Unknown CR register"); + case PPC::CR0: RegNo = 0; break; + case PPC::CR1: RegNo = 1; break; + case PPC::CR2: RegNo = 2; break; + case PPC::CR3: RegNo = 3; break; + case PPC::CR4: RegNo = 4; break; + case PPC::CR5: RegNo = 5; break; + case PPC::CR6: RegNo = 6; break; + case PPC::CR7: RegNo = 7; break; + } + + // Print the CR bit number. The Code is ((BI << 5) | BO) for a + // BCC, but we must have the positive form here (BO == 12) + unsigned BI = Code >> 5; + assert((Code & 0xF) == 12 && + "BO in predicate bit must have the positive form"); + + unsigned Value = 4*RegNo + BI; + O << Value; + return; + } + if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { case PPC::PRED_ALWAYS: return; // Don't print anything for always. diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 73fd5342a1..8f1e211c3e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -42,7 +42,7 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier); + raw_ostream &O, const char *Modifier = 0); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 96c46451be..b7f1688436 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -50,6 +50,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", + "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; @@ -66,8 +68,10 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>; +def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; +def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603]>; @@ -90,10 +94,11 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, Feature64Bit - /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, @@ -101,7 +106,7 @@ def : Processor<"pwr6", G5Itineraries, def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index fb90600211..f76b89c803 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -22,8 +22,8 @@ #include "PPCSubtarget.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Assembly/Writer.h" @@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'c': // Don't print "$" before a global var name or constant. break; // PPC never has a prefix. case 'L': // Write second word of DImode reference. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5234da71a8..f50f9b5a33 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -32,6 +32,7 @@ #define DEBUG_TYPE "ctrloops" #include "PPC.h" #include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -82,13 +83,14 @@ namespace { /// getCanonicalInductionVariable - Check to see if the loop has a canonical /// induction variable. /// Should be defined in MachineLoop. Based upon version in class Loop. - MachineInstr *getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const; + void getCanonicalInductionVariable(MachineLoop *L, + SmallVector<MachineInstr *, 4> &IVars, + SmallVector<MachineInstr *, 4> &IOps) const; /// getTripCount - Return a loop-invariant LLVM register indicating the /// number of times the loop will be executed. If the trip-count cannot /// be determined, this return null. - CountValue *getTripCount(MachineLoop *L, bool &WordCmp, + CountValue *getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const; /// isInductionOperation - Return true if the instruction matches the @@ -175,12 +177,12 @@ namespace { /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &WordCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPLWI) { - WordCmp = true; +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { + if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; return true; - } else if (MI->getOpcode() == PPC::CMPDI || MI->getOpcode() == PPC::CMPLDI) { - WordCmp = false; + } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + SignedCmp = false; return true; } @@ -227,26 +229,27 @@ bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { /// the machine. /// This method assumes that the IndVarSimplify pass has been run by 'opt'. /// -MachineInstr -*PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, - MachineInstr *&IOp) const { +void +PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, + SmallVector<MachineInstr *, 4> &IVars, + SmallVector<MachineInstr *, 4> &IOps) const { MachineBasicBlock *TopMBB = L->getTopBlock(); MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); assert(PI != TopMBB->pred_end() && "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; - if (PI == TopMBB->pred_end()) return 0; // dead loop + if (PI == TopMBB->pred_end()) return; // dead loop MachineBasicBlock *Incoming = *PI++; - if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + if (PI != TopMBB->pred_end()) return; // multiple backedges? // make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) - return 0; + return; std::swap(Incoming, Backedge); } else if (!L->contains(Backedge)) - return 0; + return; // Loop over all of the PHI nodes, looking for a canonical induction variable: // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". @@ -263,13 +266,13 @@ MachineInstr // Check if the definition is an induction operation. MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); if (isInductionOperation(DI, DefReg)) { - IOp = DI; - return MPhi; + IOps.push_back(DI); + IVars.push_back(MPhi); } } } } - return 0; + return; } /// getTripCount - Return a loop-invariant LLVM value indicating the @@ -283,66 +286,100 @@ MachineInstr /// /// Based upon getTripCount in LoopInfo. /// -CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, +CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, SmallVector<MachineInstr *, 2> &OldInsts) const { + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate a CTR loop if the loop has more than one exit. + if (LastMBB == 0) + return 0; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI->getOpcode() != PPC::BCC) + return 0; + + // We need to make sure that this compare is defining the condition + // register actually used by the terminating branch. + + unsigned PredReg = LastI->getOperand(1).getReg(); + DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); + + unsigned PredCond = LastI->getOperand(0).getImm(); + if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) + return 0; + // Check that the loop has a induction variable. - MachineInstr *IOp; - MachineInstr *IV_Inst = getCanonicalInductionVariable(L, IOp); - if (IV_Inst == 0) return 0; - - // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', - // if Imm is 0, get the count from the PHI opnd - // if Imm is -M, than M is the count - // Otherwise, Imm is the count - MachineOperand *IV_Opnd; - const MachineOperand *InitialValue; - if (!L->contains(IV_Inst->getOperand(2).getMBB())) { - InitialValue = &IV_Inst->getOperand(1); - IV_Opnd = &IV_Inst->getOperand(3); - } else { - InitialValue = &IV_Inst->getOperand(3); - IV_Opnd = &IV_Inst->getOperand(1); - } + SmallVector<MachineInstr *, 4> IVars, IOps; + getCanonicalInductionVariable(L, IVars, IOps); + for (unsigned i = 0; i < IVars.size(); ++i) { + MachineInstr *IOp = IOps[i]; + MachineInstr *IV_Inst = IVars[i]; + + // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } - // Look for the cmp instruction to determine if we - // can get a useful trip count. The trip count can - // be either a register or an immediate. The location - // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { - MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, WordCmp)) { - OldInsts.push_back(MI); - OldInsts.push_back(IOp); - - const MachineOperand &MO = MI->getOperand(2); - assert(MO.isImm() && "IV Cmp Operand should be an immediate"); - int64_t ImmVal = MO.getImm(); - - const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); - assert(L->contains(IV_DefInstr->getParent()) && - "IV definition should occurs in loop"); - int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); - - if (ImmVal == 0) { - // Make sure the induction variable changes by one on each iteration. - if (iv_value != 1 && iv_value != -1) { - return 0; - } - return new CountValue(InitialValue->getReg(), iv_value > 0); - } else { + DEBUG(dbgs() << "Considering:\n"); + DEBUG(dbgs() << " induction operation: " << *IOp); + DEBUG(dbgs() << " induction variable: " << *IV_Inst); + DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + bool SignedCmp; + MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + MI->getOperand(0).getReg() == PredReg) { + + OldInsts.push_back(MI); + OldInsts.push_back(IOp); + + DEBUG(dbgs() << " compare: " << *MI); + + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be an immediate"); + + int64_t ImmVal; + if (SignedCmp) + ImmVal = (short) MO.getImm(); + else + ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); + assert(InitialValue->isReg() && "Expecting register for init value"); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); - + unsigned InitialValueReg = InitialValue->getReg(); + + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = DefInstr->getOperand(2).getImm(); + int64_t start = (short) DefInstr->getOperand(2).getImm(); const MachineInstr *DefInstr2 = MRI->getVRegDef(DefInstr->getOperand(0).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { - start |= DefInstr2->getOperand(1).getImm() << 16; + DEBUG(dbgs() << " initial constant: " << *DefInstr); + DEBUG(dbgs() << " initial constant: " << *DefInstr2); + start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; + int64_t count = ImmVal - start; if ((count % iv_value) != 0) { return 0; @@ -351,12 +388,23 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, bool &WordCmp, } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { - int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); + DEBUG(dbgs() << " initial constant: " << *DefInstr); + + int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } return new CountValue(count/iv_value); + } else if (iv_value == 1 || iv_value == -1) { + // We can't determine a constant starting value. + if (ImmVal == 0) { + return new CountValue(InitialValueReg, iv_value > 0); + } + // FIXME: handle non-zero end value. } + // FIXME: handle non-unit increments (we might not want to introduce division + // but we can handle some 2^n cases with shifts). + } } } @@ -524,10 +572,9 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { return Changed; } - bool WordCmp; SmallVector<MachineInstr *, 2> OldInsts; // Are we able to determine the trip count for the loop? - CountValue *TripCount = getTripCount(L, WordCmp, OldInsts); + CountValue *TripCount = getTripCount(L, OldInsts); if (TripCount == 0) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; @@ -575,14 +622,21 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>(); bool isPPC64 = Subtarget.isPPC64(); + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + unsigned CountReg; if (TripCount->isReg()) { // Create a copy of the loop count register. - const TargetRegisterClass *RC = + const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); + unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + (unsigned) PPC::EXTSW_32_64 : + (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, - TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); + TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); if (TripCount->isNeg()) { unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); @@ -590,26 +644,12 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), CountReg).addReg(CountReg1); } - - // On a 64-bit system, if the original comparison was only 32-bit, then - // mask out the higher-order part of the count. - if (isPPC64 && WordCmp) { - unsigned CountReg1 = CountReg; - CountReg = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*Preheader, InsertPos, dl, - TII->get(PPC::RLDICL), CountReg).addReg(CountReg1 - ).addImm(0).addImm(32); - } } else { assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); // Put the trip count in a register for transfer into the count register. - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; int64_t CountImm = TripCount->getImm(); - if (TripCount->isNeg()) - CountImm = -CountImm; + assert(!TripCount->isNeg() && "Constant trip count must be positive"); CountReg = MF->getRegInfo().createVirtualRegister(RC); if (CountImm > 0xFFFF) { @@ -665,6 +705,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); // Conditional branch; just delete it. + DEBUG(dbgs() << "Removing old branch: " << *LastI); LastMBB->erase(LastI); delete TripCount; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index be172c2435..c24afa908d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -368,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) @@ -383,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } } else { // PPC64. @@ -401,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) .addReg(PPC::X0) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) @@ -416,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b777f9313c..a00f686adc 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -111,6 +111,23 @@ namespace { /// immediate field. Because preinc imms have already been validated, just /// accept it. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { + if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) { + Out = N; + return true; + } + + return false; + } + + /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc + /// index field. Because preinc imms have already been validated, just + /// accept it. + bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { + if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) + return false; + Out = N; return true; } @@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - // FIXME: PPC64 return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); } else { - llvm_unreachable("R+R preindex loads not supported yet!"); + unsigned Opcode; + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; + if (LD->getValueType(0) != MVT::i64) { + // Handle PPC32 integer and normal FP loads. + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::f64: Opcode = PPC::LFDUX; break; + case MVT::f32: Opcode = PPC::LFSUX; break; + case MVT::i32: Opcode = PPC::LWZUX; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX; break; + } + } else { + assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); + assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && + "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::i64: Opcode = PPC::LDUX; break; + case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX8; break; + } + } + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = { Offset, Base, Chain }; + return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 964d5a0d94..13250b33ea 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -66,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { + const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); setPow2DivIsCheap(); @@ -75,7 +76,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); + bool isPPC64 = Subtarget->isPPC64(); + setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); @@ -132,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { + if (!Subtarget->hasFSQRT()) { setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); } @@ -228,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (Subtarget->isSVR4ABI()) { + if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); @@ -273,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { + if (Subtarget->has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -292,7 +294,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } - if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or @@ -308,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { + if (Subtarget->hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -378,6 +380,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -391,7 +394,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + if (Subtarget->has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); @@ -400,7 +403,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); setExceptionSelectorRegister(PPC::X4); @@ -417,7 +420,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BSWAP); // Darwin long double math library functions have $LDBL128 appended. - if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { + if (Subtarget->isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -434,6 +437,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget->isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); setSchedulingPreference(Sched::Hybrid); @@ -1105,7 +1113,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - // TODO: Check reg+reg first. + if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } // LDU/STU use reg+imm*4, others use reg+imm. if (VT != MVT::i64) { @@ -4933,11 +4944,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_CC_F4 || - MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { + if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8)) { + unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? + PPC::ISEL8 : PPC::ISEL; + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + // The SelectPred is ((BI << 5) | BO) for a BCC + unsigned BO = SelectPred & 0xF; + assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); + + unsigned TrueOpNo, FalseOpNo; + if (BO == 12) { + TrueOpNo = 2; + FalseOpNo = 3; + } else { + TrueOpNo = 3; + FalseOpNo = 2; + SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + } + + BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(TrueOpNo).getReg()) + .addReg(MI->getOperand(FalseOpNo).getReg()) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -5873,6 +5910,26 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than +/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to +/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd +/// is expanded to mul + add. +bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + return true; + default: + break; + } + + return false; +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref) return TargetLowering::getSchedulingPreference(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 973800b461..b0a013b4b4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -366,6 +366,12 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT VT) const; + private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 562ae7da0b..a2bd55f533 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -497,6 +497,10 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; +def ISEL8 : AForm_1<31, 15, + (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; } // End FXU Operations. @@ -533,6 +537,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp NoEncode<"$ea_result">; // NO LWAU! +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } // Zero extending loads. @@ -572,6 +586,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -607,6 +637,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "ldux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } def : Pat<(PPCload ixaddr:$src), @@ -680,10 +715,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, isPPC64; -let mayStore = 1 in -def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTD, - []>, isPPC64; + +def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti32 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 6c0f3d3f06..b0b8423281 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fsub V_immneg0, - (fsub (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB)))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, + (fneg VRRC:$vB))))]>; def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 28b3bc1596..47f09dca77 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -79,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } + +// Detect 32 -> 64-bit extensions where we may reuse the low sub-register. +bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: return false; + case PPC::EXTSW: + case PPC::EXTSW_32_64: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SubIdx = PPC::sub_32; + return true; + } +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7d49aa129e..374213ea43 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -92,6 +92,9 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e4af8846df..9b390461d8 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -323,7 +323,7 @@ def memri : Operand<iPTR> { } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc, ptr_rc); + let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; @@ -349,10 +349,10 @@ def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; +def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -711,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lfd $rD, $addr", LdStLFD, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + + +// Indexed (r+r) Loads with Update (preinc). +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfsux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfdux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -822,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; - -let mayStore = 1 in { -def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStStore, - []>; -} + +def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), + (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfsux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), + (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -1236,51 +1311,43 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, + (fneg F8RC:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, + (fneg F4RC:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1331,6 +1398,13 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. + def ISEL : AForm_1<31, 15, + (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; +} + +let PPC970_Unit = 1 in { // FXU Operations. // M-Form instructions. rotate and mask instructions. // let isCommutable = 1 in { @@ -1441,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS GPRC:$in, tblockaddress:$g)>; -// Fused negative multiply subtract, alternate pattern -def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), - (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, - Requires<[FPContractions]>; -def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), - (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, - Requires<[FPContractions]>; - // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index ecb8754cbc..ab8bf1f93a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -199,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } +bool +PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + switch (RC->getID()) { + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return true; + default: + return false; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -328,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // address of new allocated space. if (LP64) { if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(Reg, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -349,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, .addImm(maxCallFrameSize) .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); } else { - BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(Reg, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 78e17c6890..152c36d699 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -47,6 +47,8 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// requiresRegisterScavenging - We require a register scavenger. /// FIXME (64-bit): Should be inlined. bool requiresRegisterScavenging(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c085ba26dd..bb193ac3d9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -38,6 +38,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasAltivec(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasISEL(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 7d9be55713..0207c83393 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -70,6 +70,7 @@ protected: bool HasAltivec; bool HasFSQRT; bool HasSTFIWX; + bool HasISEL; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -141,6 +142,7 @@ public: bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } bool hasMFOCRF() const { return HasMFOCRF; } + bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index d7a808855b..980511268a 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -98,31 +98,25 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { bool PPCPassConfig::addPreRegAlloc() { if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - PM->add(createPPCCTRLoops()); + addPass(createPPCCTRLoops()); return false; } bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createPPCISelDag(getPPCTargetMachine())); + addPass(createPPCISelDag(getPPCTargetMachine())); return false; } bool PPCPassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createPPCBranchSelectionPass()); + addPass(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { - // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - Options.DisableJumpTables = true; - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? Subtarget.SetJITMode(); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 093255e6af..cbfa4cf35b 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -964,6 +964,12 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } +The & 15 part should be optimized away, it doesn't change the result. Currently +not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index ae4af0f442..efb10db4c0 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -23,5 +23,7 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) +add_dependencies(LLVMSparcCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index c14b3d4a00..25548625e7 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -187,7 +187,9 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'r': break; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 77fd2af88d..9ee12ed7f5 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -60,7 +60,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { } bool SparcPassConfig::addInstSelector() { - PM->add(createSparcISelDag(getSparcTargetMachine())); + addPass(createSparcISelDag(getSparcTargetMachine())); return false; } @@ -68,8 +68,8 @@ bool SparcPassConfig::addInstSelector() { /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. bool SparcPassConfig::addPreEmitPass(){ - PM->add(createSparcFPMoverPass(getSparcTargetMachine())); - PM->add(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createSparcFPMoverPass(getSparcTargetMachine())); + addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index e785d330ae..ffc1d9f0d1 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/GlobalAlias.h" #include "llvm/GlobalValue.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Target/TargetMachine.h" @@ -90,26 +92,59 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +/// Get the IR-specified TLS model for Var. +static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { + switch (Var->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: + llvm_unreachable("getSelectedTLSModel for non-TLS variable"); + break; + case GlobalVariable::GeneralDynamicTLSModel: + return TLSModel::GeneralDynamic; + case GlobalVariable::LocalDynamicTLSModel: + return TLSModel::LocalDynamic; + case GlobalVariable::InitialExecTLSModel: + return TLSModel::InitialExec; + case GlobalVariable::LocalExecTLSModel: + return TLSModel::LocalExec; + } + llvm_unreachable("invalid TLS model"); +} + TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + const GlobalVariable *Var = cast<GlobalVariable>(GV); + + bool isLocal = Var->hasLocalLinkage(); + bool isDeclaration = Var->isDeclaration(); + bool isPIC = getRelocationModel() == Reloc::PIC_; + bool isPIE = Options.PositionIndependentExecutable; // FIXME: what should we do for protected and internal visibility? // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); + bool isHidden = Var->hasHiddenVisibility(); - if (getRelocationModel() == Reloc::PIC_ && - !ForceTLSNonPIC && // @LOCALMOD - !Options.PositionIndependentExecutable) { + TLSModel::Model Model; + if (isPIC && !isPIE && + !ForceTLSNonPIC) { // @LOCALMOD if (isLocal || isHidden) - return TLSModel::LocalDynamic; + Model = TLSModel::LocalDynamic; else - return TLSModel::GeneralDynamic; + Model = TLSModel::GeneralDynamic; } else { if (!isDeclaration || isHidden) - return TLSModel::LocalExec; + Model = TLSModel::LocalExec; else - return TLSModel::InitialExec; + Model = TLSModel::InitialExec; } + + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(Var); + if (SelectedModel > Model) + return SelectedModel; + + return Model; } /// getOptLevel - Returns the optimization level: None, Less, @@ -143,4 +178,3 @@ void TargetMachine::setFunctionSections(bool V) { void TargetMachine::setDataSections(bool V) { DataSections = V; } - diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c388..417842b467 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -187,7 +187,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } virtual void print(raw_ostream &OS) const {} @@ -309,25 +309,25 @@ struct X86Operand : public MCParsedAsmOperand { } bool isMem() const { return Kind == Memory; } - bool isMem8() const { + bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } - bool isMem16() const { + bool isMem16() const { return Kind == Memory && (!Mem.Size || Mem.Size == 16); } - bool isMem32() const { + bool isMem32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32); } - bool isMem64() const { + bool isMem64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64); } - bool isMem80() const { + bool isMem80() const { return Kind == Memory && (!Mem.Size || Mem.Size == 80); } - bool isMem128() const { + bool isMem128() const { return Kind == Memory && (!Mem.Size || Mem.Size == 128); } - bool isMem256() const { + bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } @@ -356,26 +356,26 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -467,7 +467,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && + return Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && @@ -611,7 +611,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); - + if (getLexer().is(AsmToken::Identifier)) { // Parse BaseReg if (ParseRegister(BaseReg, Start, End)) { @@ -668,7 +668,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, End = Parser.getTok().getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) return 0; } } @@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. if (BaseReg != 0 && IndexReg != 0) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && IndexReg != X86::RIZ) { Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); return 0; } if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && IndexReg != X86::EIZ){ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); return 0; @@ -944,7 +947,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if (PatchedName.startswith("set") && PatchedName.endswith("b") && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); - + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -1217,7 +1220,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } } - + // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { @@ -1520,7 +1523,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. + // individual transformations can chain off each other. while (processInstruction(Inst, Operands)) ; @@ -1558,12 +1561,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; - + // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); @@ -1691,19 +1694,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { const MCExpr *Value; if (getParser().ParseExpression(Value)) return true; - + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - + if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); Parser.Lex(); } } - + Parser.Lex(); return false; } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 5b402da3ad..45fd42f205 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -53,6 +53,8 @@ endif() add_llvm_target(X86CodeGen ${sources}) +add_dependencies(LLVMX86CodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13a00620b..d58e36c803 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -498,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { baseReg = MCOperand::CreateReg(0); } - + + // Check whether we are handling VSIB addressing mode for GATHER. + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and + // we should use SIB_INDEX_XMM4|YMM4 for VSIB. + // I don't see a way to get the correct IndexReg in readSIB: + // We can tell whether it is VSIB or SIB after instruction ID is decoded, + // but instruction ID may not be decoded yet when calling readSIB. + uint32_t Opcode = mcInst.getOpcode(); + bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm || + Opcode == X86::VPGATHERDQrm || + Opcode == X86::VPGATHERDQYrm || + Opcode == X86::VPGATHERQQrm || + Opcode == X86::VPGATHERDDrm || + Opcode == X86::VPGATHERQDrm); + bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VPGATHERQQYrm || + Opcode == X86::VPGATHERDDYrm || + Opcode == X86::VPGATHERQDYrm); + if (IndexIs128 || IndexIs256) { + unsigned IndexOffset = insn.sibIndex - + (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); + SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + insn.sibIndex = (SIBIndex)(IndexBase + + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); + } + if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -509,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, indexReg = MCOperand::CreateReg(X86::x); break; EA_BASES_32BIT EA_BASES_64BIT + REGS_XMM + REGS_YMM #undef ENTRY } } else { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b45d..e2caf6a2a8 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -310,11 +310,14 @@ typedef enum { * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM. */ typedef enum { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM #undef ENTRY SIB_INDEX_max } SIBIndex; diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index c3f46ebda0..b0e5be3162 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -483,17 +483,17 @@ namespace X86II { // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(uint64_t TSFlags) { + inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { + inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8: @@ -508,7 +508,7 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { + inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8PCRel: @@ -531,7 +531,7 @@ namespace X86II { /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only /// counted as one operand. /// - static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { + inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: // FIXME: Remove this form. @@ -594,7 +594,7 @@ namespace X86II { /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. - static inline bool isX86_64ExtendedReg(unsigned RegNo) { + inline bool isX86_64ExtendedReg(unsigned RegNo) { switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: @@ -616,7 +616,7 @@ namespace X86II { return false; } - static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + inline bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 61e2fdcb62..7f7873acd1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ee66e7ce1c..599c8f8c6d 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,10 +20,10 @@ #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -436,7 +436,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index af9efbd906..e263e44f40 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -52,7 +52,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || - MMI.callsUnwindInit()); + MMI.callsUnwindInit() || MMI.callsEHReturn()); } static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { @@ -652,7 +652,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -916,18 +915,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, UseLEA, TII, *RegInfo); - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. - unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; - BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) - .addReg(StackPtr) - .setMIFlag(MachineInstr::FrameSetup); - } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); @@ -1184,16 +1171,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); - if (RegInfo->hasBasePointer(MF)) { - assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); - if (FI < 0) { - // Skip the saved EBP. - return Offset + RegInfo->getSlotSize(); - } else { - assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); - return Offset + StackSize; - } - } else if (RegInfo->needsStackRealignment(MF)) { + if (RegInfo->needsStackRealignment(MF)) { if (FI < 0) { // Skip the saved EBP. return Offset + RegInfo->getSlotSize(); @@ -1224,14 +1202,9 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); // We can't calculate offset from frame pointer if the stack is realigned, - // so enforce usage of stack/base pointer. The base pointer is used when we - // have dynamic allocas in addition to dynamic realignment. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else if (RegInfo->needsStackRealignment(MF)) - FrameReg = RegInfo->getStackRegister(); - else - FrameReg = RegInfo->getFrameRegister(MF); + // so enforce usage of stack pointer. + FrameReg = (RegInfo->needsStackRealignment(MF)) ? + RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF); return getFrameIndexOffset(MF, FI); } @@ -1368,10 +1341,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; } - - // Spill the BasePtr if it's used. - if (RegInfo->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); } static bool diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c8ff1cf0d0..2871a790c6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -188,6 +188,7 @@ namespace { private: SDNode *Select(SDNode *N); + SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -2165,6 +2166,30 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { + // Operands of Gather: VSrc, Base, VIdx, VMask, Scale + SDValue Chain = Node->getOperand(0); + SDValue VSrc = Node->getOperand(2); + SDValue Base = Node->getOperand(3); + SDValue VIdx = Node->getOperand(4); + SDValue VMask = Node->getOperand(5); + ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6)); + if (!Scale) + return 0; + + // Memory Operands: Base, Scale, Index, Disp, Segment + SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Segment = CurDAG->getRegister(0, MVT::i32); + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + Disp, Segment, VMask, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + VSrc.getValueType(), MVT::Other, + Ops, array_lengthof(Ops)); + return ResNode; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2180,23 +2205,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: break; + case Intrinsic::x86_avx2_gather_d_pd: + case Intrinsic::x86_avx2_gather_d_pd_256: + case Intrinsic::x86_avx2_gather_q_pd: + case Intrinsic::x86_avx2_gather_q_pd_256: + case Intrinsic::x86_avx2_gather_d_ps: + case Intrinsic::x86_avx2_gather_d_ps_256: + case Intrinsic::x86_avx2_gather_q_ps: + case Intrinsic::x86_avx2_gather_q_ps_256: + case Intrinsic::x86_avx2_gather_d_q: + case Intrinsic::x86_avx2_gather_d_q_256: + case Intrinsic::x86_avx2_gather_q_q: + case Intrinsic::x86_avx2_gather_q_q_256: + case Intrinsic::x86_avx2_gather_d_d: + case Intrinsic::x86_avx2_gather_d_d_256: + case Intrinsic::x86_avx2_gather_q_d: + case Intrinsic::x86_avx2_gather_q_d_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; + case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; + case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; + case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; + case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; + case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; + case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; + case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; + case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; + case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; + case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; + case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; + case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; + case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; + case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; + case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; + } + SDNode *RetVal = SelectGather(Node, Opc); + if (RetVal) + return RetVal; + break; + } + } + break; + } case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); + case X86ISD::ATOMSWAP64_DAG: { + unsigned Opc; + switch (Opcode) { + default: llvm_unreachable("Impossible intrinsic"); + case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; + case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; + case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; + case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; + case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; + case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; + } + SDNode *RetVal = SelectAtomic64(Node, Opc); + if (RetVal) + return RetVal; + break; + } case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bf559c98dd..4197c35adb 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -99,6 +99,10 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { + // Inserting UNDEF is Result + if (Vec.getOpcode() == ISD::UNDEF) + return Result; + EVT VT = Vec.getValueType(); assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); @@ -114,9 +118,8 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, - VecIdx); - return Result; + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); } /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 @@ -136,10 +139,13 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { if (Subtarget->isTargetEnvMacho()) { if (is64Bit) - return new X8664_MachoTargetObjectFile(); + return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); } + if (Subtarget->isTargetLinux()) + return new X86LinuxTargetObjectFile(); + // @LOCALMOD-BEGIN if (Subtarget->isTargetNaCl()) return new TargetLoweringObjectFileNaCl(); @@ -3536,6 +3542,52 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { return true; } +// +// Some special combinations that can be optimized. +// +static +SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + DebugLoc dl = SVOp->getDebugLoc(); + + if (VT != MVT::v8i32 && VT != MVT::v8f32) + return SDValue(); + + ArrayRef<int> Mask = SVOp->getMask(); + + // These are the special masks that may be optimized. + static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; + static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; + bool MatchEvenMask = true; + bool MatchOddMask = true; + for (int i=0; i<8; ++i) { + if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) + MatchEvenMask = false; + if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) + MatchOddMask = false; + } + static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; + static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; + + const int *CompactionMask; + if (MatchEvenMask) + CompactionMask = CompactionMaskEven; + else if (MatchOddMask) + CompactionMask = CompactionMaskOdd; + else + return SDValue(); + + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); + + SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), + UndefNode, CompactionMask); + SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), + UndefNode, CompactionMask); + static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, @@ -5041,8 +5093,16 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { SDValue Sc = Op.getOperand(0); if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && - Sc.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); + Sc.getOpcode() != ISD::BUILD_VECTOR) { + + if (!Subtarget->hasAVX2()) + return SDValue(); + + // Use the register form of the broadcast instruction available on AVX2. + if (VT.is256BitVector()) + Sc = Extract128BitVector(Sc, 0, DAG, dl); + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); + } Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || @@ -6022,6 +6082,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + + SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG); + if (NewOp.getNode()) + return NewOp; + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); @@ -7504,11 +7569,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); // @LOCALMOD-START @@ -9995,7 +10055,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); @@ -10015,7 +10074,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - MF.getRegInfo().addLiveOut(StoreAddrReg); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, @@ -16240,12 +16298,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. - if (VT == MVT::f32) + + if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; - else if (VT == MVT::f64) + else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; else if (X86::VR128RegClass.hasType(VT)) Res.second = &X86::VR128RegClass; + else if (X86::VR256RegClass.hasType(VT)) + Res.second = &X86::VR256RegClass; } return Res; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index fa1d67644d..aaef4a466d 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -55,11 +55,11 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } - - + + void getFullAddress(SmallVectorImpl<MachineOperand> &MO) { assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); - + if (BaseType == X86AddressMode::RegBase) MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, false, false, false, 0, false)); @@ -67,16 +67,16 @@ struct X86AddressMode { assert(BaseType == X86AddressMode::FrameIndexBase); MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); } - + MO.push_back(MachineOperand::CreateImm(Scale)); MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, false, false, false, 0, false)); - + if (GV) MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); else MO.push_back(MachineOperand::CreateImm(Disp)); - + MO.push_back(MachineOperand::CreateReg(0, false, false, false, false, false, 0, false)); } @@ -122,7 +122,7 @@ static inline const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else { @@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB, MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else MIB.addImm(AM.Disp); - + return MIB.addReg(0); } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index cbec891d7e..bebe5f033c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -367,6 +367,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. +// SSDI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index dabb181cce..cb926f63a4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -55,39 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load", enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 7) + // (stored in bits 0 - 3) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, TB_INDEX_3 = 3, - TB_INDEX_MASK = 0xff, - - // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) - TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + TB_INDEX_MASK = 0xf, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 16, + TB_NO_REVERSE = 1 << 4, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 17, + TB_NO_FORWARD = 1 << 5, - TB_FOLDED_LOAD = 1 << 18, - TB_FOLDED_STORE = 1 << 19 + TB_FOLDED_LOAD = 1 << 6, + TB_FOLDED_STORE = 1 << 7, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; struct X86OpTblEntry { uint16_t RegOp; uint16_t MemOp; - uint32_t Flags; + uint16_t Flags; }; X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -415,14 +415,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -499,14 +495,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, - { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, @@ -815,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, - { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, - { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, - { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5ae6b99e5a..4006dad684 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -145,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo { std::pair<unsigned, unsigned> > MemOp2RegOpTableType; MemOp2RegOpTableType MemOp2RegOpTable; - void AddTableEntry(RegOp2MemOpTableType &R2MTable, - MemOp2RegOpTableType &M2RTable, - unsigned RegOp, unsigned MemOp, unsigned Flags); + static void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 892115b77e..0edd10a355 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -333,6 +333,12 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } +def v128mem : X86MemOperand<"printf128mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86Mem128AsmOperand; } +def v256mem : X86MemOperand<"printf256mem"> { + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86Mem256AsmOperand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 56542494b2..5319455dc5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1418,10 +1418,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, OpndItins itins> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], itins.rr, d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], itins.rm, d>; } @@ -1622,7 +1622,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; @@ -1630,14 +1630,16 @@ defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, TB, VEX, + Requires<[HasAVX]>; } -let Pattern = []<dag> in { +let Pattern = []<dag>, neverHasSideEffects = 1 in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS; @@ -1646,8 +1648,8 @@ defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, - TB; /* PD SSE3 form is avaiable */ + SSEPackedSingle, SSE_CVT_PS>, TB, + Requires<[HasSSE2]>; } let Predicates = [HasAVX] in { @@ -1788,56 +1790,6 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -// Convert doubleword to packed single/double fp -// SSE2 instructions without OpSize prefix -def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, Requires<[HasSSE2]>; - -// SSE2 instructions with XS prefix -def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, Requires<[HasSSE2]>; -def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, Requires<[HasSSE2]>; - - // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], @@ -1858,51 +1810,63 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PS_RM>; -def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, - VEX; -def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>; - -// SSE2 packed instructions with XD prefix -def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, VEX, Requires<[HasAVX]>; -def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, VEX, Requires<[HasAVX]>; -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, Requires<[HasSSE2]>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (VCVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (VCVTPS2DQrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), + (CVTPS2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), + (CVTPS2DQrm addr:$src)>; +} + +// Convert Packed Double FP to Packed DW Integers +let Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTPD2DQrr VR128:$dst, VR128:$src)>; +def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", + (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; +} + +def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; +def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (VCVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (VCVTPD2DQXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), + (CVTPD2DQrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), + (CVTPD2DQrm addr:$src)>; +} // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1914,7 +1878,7 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))], + (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", @@ -1935,14 +1899,19 @@ def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))], + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_VCVTDQ2PSrr VR128:$src)>; + (VCVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_VCVTDQ2PSrm addr:$src)>; + (VCVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -1962,9 +1931,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; + (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_CVTDQ2PSrm addr:$src)>; + (CVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (CVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (CVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>; @@ -1977,12 +1951,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -let isCodeGenOnly = 1 in -def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], @@ -1990,31 +1959,38 @@ def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + "cvttpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; + +let Predicates = [HasAVX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2032,35 +2008,71 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; } + +let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; +} + +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (VCVTPS2PDrr VR128:$src)>; +} -def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, Requires<[HasSSE2]>; +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), + (CVTPS2PDrr VR128:$src)>; +} + +// Convert Packed DW Integers to Packed Double FP +let Predicates = [HasAVX] in { +def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; + +// 128 bit register conversion intrinsics +let Predicates = [HasAVX] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (VCVTDQ2PDrr VR128:$src)>; + +let Predicates = [HasSSE2] in +def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), + (CVTDQ2PDrr VR128:$src)>; + +// AVX 256-bit register conversion intrinsics +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), + (VCVTDQ2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), + (VCVTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTPD2DQYrm addr:$src)>; + + def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (VCVTDQ2PDYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed double to packed single // The assembler can recognize rr 256-bit instructions by seeing a ymm @@ -2069,25 +2081,24 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; -def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", + (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", + (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; + def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>; @@ -2096,64 +2107,60 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), IIC_SSE_CVT_PD_RM>; -def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; -def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (VCVTPD2PSXrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), + (CVTPD2PSrm addr:$src)>; +} // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. -def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), - (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), - (VCVTDQ2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - -// Match fround and fextend for 128/256-bit conversions -def : Pat<(v4f32 (fround (v4f64 VR256:$src))), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), - (VCVTPS2PDYrm addr:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), + (VCVTDQ2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), + (VCVTPS2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTPS2DQYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), + (VCVTPS2PDYrm addr:$src)>; + + def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTTPD2DQYrm addr:$src)>; + + // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions @@ -4889,80 +4896,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; //===---------------------------------------------------------------------===// -// SSE3 - Conversion Instructions -//===---------------------------------------------------------------------===// - -// Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX] in { -// The assembler can recognize rr 256-bit instructions by seeing a ymm -// register, but the same isn't true when using memory operands instead. -// Provide other assembly rr and rm forms to address this explicitly. -def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; - -// XMM only -def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; - -// YMM only -def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} - -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; - -def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTTPD2DQYrm addr:$src)>; - -// Convert Packed DW Integers to Packed Double FP -let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -} - -def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; - -// AVX 256-bit register conversion intrinsics -def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - -def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (VCVTDQ2PDYrm addr:$src)>; - -//===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, @@ -7339,8 +7272,8 @@ let ExeDomain = SSEPackedSingle in { int_x86_avx2_vbroadcast_ss_ps_256>; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; +def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7751,6 +7684,31 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VPBROADCASTQYrm addr:$src)>; + def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBrr VR128:$src)>; + def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBYrr VR128:$src)>; + def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWrr VR128:$src)>; + def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWYrr VR128:$src)>; + def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDrr VR128:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDYrr VR128:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQYrr VR128:$src)>; + def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSrr VR128:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSYrr VR128:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), + (VBROADCASTSDYrr VR128:$src)>; + // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { @@ -7761,7 +7719,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), @@ -7771,7 +7729,7 @@ let Predicates = [HasAVX2] in { (VBROADCASTSSYrr (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDrr + (VBROADCASTSDYrr (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; } } @@ -8061,3 +8019,55 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +multiclass avx2_gather<bits<8> opc, string OpcodeStr, + RegisterClass RC256, X86MemOperand memop256, + Intrinsic IntGather128, Intrinsic IntGather256> { + def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, v128mem:$src2, VR128:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3; + def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst), + (ins RC256:$src1, memop256:$src2, RC256:$mask), + !strconcat(OpcodeStr, + "\t{$src1, $src2, $mask|$mask, $src2, $src1}"), + []>, VEX_4VOp3, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + VR256, v128mem, + int_x86_avx2_gather_d_pd, + int_x86_avx2_gather_d_pd_256>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + VR256, v256mem, + int_x86_avx2_gather_q_pd, + int_x86_avx2_gather_q_pd_256>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + VR256, v256mem, + int_x86_avx2_gather_d_ps, + int_x86_avx2_gather_d_ps_256>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + VR128, v256mem, + int_x86_avx2_gather_q_ps, + int_x86_avx2_gather_q_ps_256>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", + VR256, v128mem, + int_x86_avx2_gather_d_q, + int_x86_avx2_gather_d_q_256>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", + VR256, v256mem, + int_x86_avx2_gather_q_q, + int_x86_avx2_gather_q_q_256>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", + VR256, v256mem, + int_x86_avx2_gather_d_d, + int_x86_avx2_gather_d_d_256>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", + VR128, v256mem, + int_x86_avx2_gather_q_d, + int_x86_avx2_gather_q_d_256>; +} diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 6a8f0c8486..6d3548f093 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -17,17 +17,17 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1be4c3864a..ed086dd8ad 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,10 +50,6 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); -cl::opt<bool> -EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - // @LOCALMOD-BEGIN extern cl::opt<bool> FlagUseZeroBasedSandbox; extern cl::opt<bool> FlagRestrictR15; @@ -77,12 +73,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; - BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; - BasePtr = X86::EBX; } } @@ -301,20 +295,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*I); } - // Set the base-pointer register and its aliases as reserved if needed. - if (hasBasePointer(MF)) { - CallingConv::ID CC = MF.getFunction()->getCallingConv(); - const uint32_t* RegMask = getCallPreservedMask(CC); - if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) - report_fatal_error( - "Stack realignment in presence of dynamic allocas is not supported with" - "this calling convention."); - - Reserved.set(getBaseRegister()); - for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I) - Reserved.set(*I); - } - // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); @@ -384,35 +364,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - if (!EnableBasePointer) - return false; - - // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) - return true; - - return false; -} - bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - if (!MF.getTarget().Options.RealignStack) - return false; - - // Stack realignment requires a frame pointer. If we already started - // register allocation with frame pointer elimination, it is too late now. - if (!MRI->canReserveReg(FramePtr)) - return false; - - // If base pointer is necessary. Check that it isn't too late to reserve it. - if (MFI->hasVarSizedObjects()) - return MRI->canReserveReg(BasePtr); - return true; + return (MF.getTarget().Options.RealignStack && + !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { @@ -422,6 +377,13 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); + // FIXME: Currently we don't support stack realignment for functions with + // variable-sized allocas. + // FIXME: It's more complicated than this... + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + report_fatal_error( + "Stack realignment in presence of dynamic allocas is not supported"); + // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); @@ -561,9 +523,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; - if (hasBasePointer(MF)) - BasePtr = getBaseRegister(); - else if (needsStackRealignment(MF)) + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else if (AfterFPPop) BasePtr = StackPtr; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 1bc32cbb78..ee69842b10 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -50,11 +50,6 @@ private: /// unsigned FramePtr; - /// BasePtr - X86 physical register used as a base ptr in complex stack - /// frames. I.e., when we need a 3rd base, not just SP and FP, due to - /// variable size stack objects. - unsigned BasePtr; - public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -111,8 +106,6 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasBasePointer(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; @@ -130,7 +123,6 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } - unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6d05a91a32..20acc2bab3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -150,44 +150,44 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { bool X86PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses. if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) - PM->add(createCleanupLocalDynamicTLSPass()); + addPass(createCleanupLocalDynamicTLSPass()); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) - PM->add(createGlobalBaseRegPass()); + addPass(createGlobalBaseRegPass()); return false; } bool X86PassConfig::addPreRegAlloc() { - PM->add(createX86MaxStackAlignmentHeuristicPass()); + addPass(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } bool X86PassConfig::addPostRegAlloc() { - PM->add(createX86FloatingPointStackifierPass()); + addPass(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } bool X86PassConfig::addPreEmitPass() { bool ShouldPrint = false; if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) { - PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + addPass(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } if (getX86Subtarget().hasAVX() && UseVZeroUpper) { - PM->add(createX86IssueVZeroUpperPass()); + addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } // @LOCALMOD-START if (getX86Subtarget().isTargetNaCl()) { - PM->add(createX86NaClRewritePass()); + addPass(createX86NaClRewritePass()); ShouldPrint = true; } // @LOCALMOD-END diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 32bfba96bb..4f39d68d40 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -10,17 +10,19 @@ #include "X86TargetObjectFile.h" #include "X86TargetMachine.h" #include "X86Subtarget.h" // @LOCALMOD +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" // @LOCALMOD +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" using namespace llvm; using namespace dwarf; -const MCExpr *X8664_MachoTargetObjectFile:: +const MCExpr *X86_64MachoTargetObjectFile:: getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { @@ -39,12 +41,18 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } -MCSymbol *X8664_MachoTargetObjectFile:: +MCSymbol *X86_64MachoTargetObjectFile:: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } +void +X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} + // @LOCALMOD-START // NOTE: this was largely lifted from // lib/Target/ARM/ARMTargetObjectFile.cpp diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 34c1234eae..5fac48e57a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -16,9 +16,9 @@ namespace llvm { - /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin + /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. - class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: virtual const MCExpr * getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, @@ -32,6 +32,12 @@ namespace llvm { MachineModuleInfo *MMI) const; }; + /// X86LinuxTargetObjectFile - This implementation is used for linux x86 + /// and x86-64. + class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + // @LOCALMOD-BEGIN class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF { public: diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 0d59572a0d..ca94f03a64 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -22,5 +22,7 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) +add_dependencies(LLVMXCoreCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 8906b2459e..c76866f47b 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -18,9 +18,9 @@ #include "XCoreSubtarget.h" #include "XCoreTargetMachine.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -260,7 +260,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { - printOperand(MI, OpNo, O); + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } + +printOperand(MI, OpNo, O); return false; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index b25a08d25c..b2f0603776 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -796,7 +796,7 @@ def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size), def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), "mkmsk $dst, $size", - [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>; + [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>; def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 5afd5a1aff..11ec86b0fa 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -55,7 +55,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { } bool XCorePassConfig::addInstSelector() { - PM->add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); + addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false; } |