diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
commit | b3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch) | |
tree | 0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib | |
parent | a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff) | |
parent | 72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff) |
Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'
Conflicts:
lib/Target/ARM/ARMISelDAGToDAG.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsSubtarget.cpp
Diffstat (limited to 'lib')
135 files changed, 6710 insertions, 782 deletions
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index acda34ba14..9a1ca63c1c 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -196,7 +196,7 @@ void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { // as volatile if they are live across a setjmp call, and they probably // won't do this in callers. exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->hasFnAttr(Attribute::ReturnsTwice); + !F->getFnAttributes().hasReturnsTwiceAttr(); // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 17631ddb30..dec0eced27 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -141,12 +141,13 @@ private: for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { CallSite CS(cast<Value>(II)); - if (CS && !isa<IntrinsicInst>(II)) { + if (CS) { const Function *Callee = CS.getCalledFunction(); - if (Callee) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); - else + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } } } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 1a94665096..7ecc06bbb2 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -128,7 +128,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { public: CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + AlwaysInline(F.getFnAttributes().hasAlwaysInlineAttr()), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), @@ -613,7 +613,7 @@ bool CallAnalyzer::visitStore(StoreInst &I) { bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && - !F.hasFnAttr(Attribute::ReturnsTwice)) { + !F.getFnAttributes().hasReturnsTwiceAttr()) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; @@ -1043,7 +1043,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // something else. Don't inline functions marked noinline or call sites // marked noinline. if (!Callee || Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + Callee->getFnAttributes().hasNoInlineAttr() || CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 83bdf5286a..7bd945733b 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -411,14 +411,50 @@ void Lint::visitMemoryReference(Instruction &I, "Undefined behavior: Branch to non-blockaddress", &I); } + // Check for buffer overflows and misalignment. if (TD) { - if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + Type *ATy = AI->getAllocatedType(); + if (!AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); + } + } - if (Align != 0) { - unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); - Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 5736c3569d..9ce9f8c801 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -327,7 +327,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) { + LI->getParent()->getParent()->getFnAttributes().hasAddressSafetyAttr()){ // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index eedec8383a..66a8e17e11 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -919,23 +919,13 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { Attrs = Attribute::None; LocTy AttrLoc = Lex.getLoc(); + bool HaveError = false; while (1) { - switch (Lex.getKind()) { + lltok::Kind Token = Lex.getKind(); + switch (Token) { default: // End of attributes. - if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) - return Error(AttrLoc, "invalid use of function-only attribute"); - - // As a hack, we allow "align 2" on functions as a synonym for - // "alignstack 2". - if (AttrKind == 2 && - (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment))) - return Error(AttrLoc, "invalid use of attribute on a function"); - - if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly)) - return Error(AttrLoc, "invalid use of parameter-only attribute"); - - return false; + return HaveError; case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; case lltok::kw_signext: Attrs |= Attribute::SExt; break; case lltok::kw_inreg: Attrs |= Attribute::InReg; break; @@ -980,6 +970,51 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { } } + + // Perform some error checking. + switch (Token) { + default: + if (AttrKind == 2) + HaveError |= Error(AttrLoc, "invalid use of attribute on a function"); + break; + case lltok::kw_align: + // As a hack, we allow "align 2" on functions as a synonym for + // "alignstack 2". + break; + + // Parameter Only: + case lltok::kw_sret: + case lltok::kw_nocapture: + case lltok::kw_byval: + case lltok::kw_nest: + if (AttrKind != 0) + HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute"); + break; + + // Function Only: + case lltok::kw_noreturn: + case lltok::kw_nounwind: + case lltok::kw_readnone: + case lltok::kw_readonly: + case lltok::kw_noinline: + case lltok::kw_alwaysinline: + case lltok::kw_optsize: + case lltok::kw_ssp: + case lltok::kw_sspreq: + case lltok::kw_noredzone: + case lltok::kw_noimplicitfloat: + case lltok::kw_naked: + case lltok::kw_inlinehint: + case lltok::kw_alignstack: + case lltok::kw_uwtable: + case lltok::kw_nonlazybind: + case lltok::kw_returns_twice: + case lltok::kw_address_safety: + if (AttrKind != 2) + HaveError |= Error(AttrLoc, "invalid use of function-only attribute"); + break; + } + Lex.Lex(); } } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4a11223711..c3bffc5d63 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -477,7 +477,7 @@ bool BitcodeReader::ParseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Attributes ReconstitutedAttr = - Attribute::decodeLLVMAttributesForBitcode(Record[i+1]); + Attributes::decodeLLVMAttributesForBitcode(Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 94ebe190d4..b3f1bb13a9 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -177,7 +177,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { const AttributeWithIndex &PAWI = A.getSlot(i); Record.push_back(PAWI.Index); - Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); + Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs)); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 55aa4ee665..d506d7e507 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1140,6 +1140,11 @@ void AsmPrinter::EmitJumpTableInfo() { EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; @@ -1180,6 +1185,8 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); } + if (!JTInDiffSection) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } /// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index efe022b074..5494c0f784 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -590,7 +590,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 99233dfc2e..1009a1e29c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -373,7 +373,7 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { /// bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) + if (F->getFnAttributes().hasOptimizeForSizeAttr()) return false; unsigned Align = TLI->getPrefLoopAlignment(); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index f4ebcd6fa4..c3bf2d234c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -427,7 +427,7 @@ void LiveInterval::join(LiveInterval &Other, // If we have to apply a mapping to our base interval assignment, rewrite it // now. - if (MustMapCurValNos) { + if (MustMapCurValNos && !empty()) { // Map the first live range. iterator OutIt = begin(); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b4ce9aa8c1..82710414b3 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -87,7 +87,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, // We can't remat physreg uses, unless it is a constant. if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) continue; return false; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 9a8cc48172..1f1ce671f5 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1013,7 +1013,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (F.getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 304e39e159..34fbfe20f4 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -59,13 +59,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, RegInfo = 0; MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); - if (Fn->hasFnAttr(Attribute::StackAlignment)) + if (Fn->getFnAttributes().hasStackAlignmentAttr()) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getFnAttributes().getStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->hasFnAttr(Attribute::OptimizeForSize)) + if (!Fn->getFnAttributes().hasOptimizeForSizeAttr()) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 5ab56c09f5..a1c7e9f5fb 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -21,8 +21,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// // Out of line virtual method. -void MachineModuleInfoMachO::Anchor() {} -void MachineModuleInfoELF::Anchor() {} +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c791ffb28c..3a4125475e 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -96,7 +96,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasNakedAttr()) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,7 +111,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttr(Attribute::Naked)) + if (!F->getFnAttributes().hasNakedAttr()) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -221,7 +221,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // In Naked functions we aren't going to save any registers. - if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) + if (Fn.getFunction()->getFnAttributes().hasNakedAttr()) return; std::vector<CalleeSavedInfo> CSI; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index dd0f548867..f45072f1ac 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -70,7 +70,7 @@ VerifyCoalescing("verify-coalescing", // Temporary option for testing new coalescer algo. static cl::opt<bool> -NewCoalescer("new-coalescer", cl::Hidden, +NewCoalescer("new-coalescer", cl::Hidden, cl::init(true), cl::desc("Use new coalescer algorithm")); namespace { @@ -1732,6 +1732,12 @@ void JoinVals::pruneValues(JoinVals &Other, case CR_Replace: // This value takes precedence over the value in Other.LI. LIS->pruneValue(&Other.LI, Def, &EndPoints); + // Remove <def,read-undef> flags. This def is now a partial redef. + if (!Def.isBlock()) + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) + if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) + MO->setIsUndef(false); DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def << ": " << Other.LI << '\n'); break; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0107ded953..d115991858 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2999,7 +2999,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); - else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), @@ -3217,11 +3217,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if ((LShVal + RShVal) != OpSizeInBits) return 0; - SDValue Rot; - if (HasROTL) - Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); - else - Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -3254,12 +3251,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTL) - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); } } } @@ -3271,25 +3264,21 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { if (SUBC->getAPIntValue() == OpSizeInBits) { - if (HasROTR) - return DAG.getNode(ISD::ROTR, DL, VT, - LHSShiftArg, RHSShiftAmt).getNode(); - else - return DAG.getNode(ISD::ROTL, DL, VT, - LHSShiftArg, LHSShiftAmt).getNode(); + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } } } // Look for sign/zext/any-extended or truncate cases: - if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && - (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND - || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND - || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { SDValue LExtOp0 = LHSShiftAmt.getOperand(0); SDValue RExtOp0 = RHSShiftAmt.getOperand(0); if (RExtOp0.getOpcode() == ISD::SUB && @@ -4428,20 +4417,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); - if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, - N0.getOperand(0), N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); - } + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -5251,13 +5238,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // if the source is smaller than the dest, we still need an extend return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); - else if (N0.getOperand(0).getValueType().bitsGT(VT)) + if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); - else - // if the source and dest are the same type, we can drop both the extend - // and the truncate. - return N0.getOperand(0); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); } // Fold extract-and-trunc into a narrow extract. For example: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bd33479b94..a48a6256e5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3521,7 +3521,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3614,7 +3614,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3692,7 +3692,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4f6ff08407..65becbe44f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT); + EVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -99,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, + const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) - return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -125,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, if (RoundParts > 2) { Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, - PartVT, HalfVT); + PartVT, HalfVT, V); Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, - RoundParts / 2, PartVT, HalfVT); + RoundParts / 2, PartVT, HalfVT, V); } else { Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); @@ -143,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, DL, - Parts + RoundParts, OddParts, PartVT, OddVT); + Parts + RoundParts, OddParts, PartVT, OddVT, V); // Combine the round and odd parts. Lo = Val; @@ -172,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); } } @@ -210,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, llvm_unreachable("Unknown mismatch!"); } -/// getCopyFromParts - Create a value that contains the specified legal parts -/// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra -/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT -/// (ISD::AssertSext). +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT) { + EVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -243,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -252,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + PartVT, IntermediateVT, V); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -300,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); // Handle cases such as i8 -> <1 x i1> - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); + if (ValueVT.getVectorNumElements() != 1) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("non-trivial scalar-to-vector conversion"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + report_fatal_error("Cannot handle scalar-to-vector conversion!"); + } if (ValueVT.getVectorNumElements() == 1 && ValueVT.getVectorElementType() != PartVT) { @@ -313,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } - - - static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT); + EVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, + EVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); @@ -383,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); + if (PartVT != ValueVT) { + LLVMContext &Ctx = *DAG.getContext(); + Twine ErrMsg("scalar-to-vector conversion failed"); + if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) { + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + ErrMsg = ErrMsg + ", possible invalid constraint for vector type"; + Ctx.emitError(I, ErrMsg); + } else { + Ctx.emitError(ErrMsg); + } + } + Parts[0] = Val; return; } @@ -398,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, DAG.getIntPtrConstant(RoundBits)); - getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); if (TLI.isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. @@ -444,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT) { + EVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -530,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -538,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); } } - - - namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -622,14 +641,15 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, + const Value *V = 0) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; + SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -648,7 +668,8 @@ namespace { SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -722,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); + NumRegs, RegisterVT, ValueVT, V); Part += NumRegs; Parts.clear(); } @@ -737,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { + SDValue &Chain, SDValue *Flag, + const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Get the list of the values's legal parts. @@ -749,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); + &Parts[Part], NumParts, RegisterVT, V); Part += NumParts; } @@ -994,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1149,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); @@ -1218,7 +1240,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -2093,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(4)) + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2565,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) continue; - // If the switch has more than 5 blocks, and at least 40% dense, and the + // If the switch has more than N blocks, and is at least 40% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. + // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) continue; @@ -4377,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::OptimizeForSize) || + if (!F->getFnAttributes().hasOptimizeForSizeAttr() || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -6244,7 +6267,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), DAG, AsmNodeOperands); @@ -6326,7 +6349,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, DAG, AsmNodeOperands); @@ -6357,7 +6380,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { @@ -6397,7 +6420,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), - Chain, &Flag); + Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6515,7 +6538,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, ExtendKind); + PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -6596,7 +6619,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, + NumRegs, RegisterVT, VT, NULL, AssertOp)); CurReg += NumRegs; } @@ -6635,7 +6658,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); + RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6777,7 +6800,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, AssertOp); + RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -6818,7 +6841,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - AssertOp)); + NULL, AssertOp)); } i += NumParts; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 56f3a45c9a..be3ecf34f7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -613,6 +613,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; SupportJumpTables = true; + MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index a04ac3fbc1..a58c144659 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -137,10 +137,10 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { /// add a guard variable to functions that call alloca, and functions with /// buffers larger than SSPBufferSize bytes. bool StackProtector::RequiresStackProtector() const { - if (F->hasFnAttr(Attribute::StackProtectReq)) + if (F->getFnAttributes().hasStackProtectReqAttr()) return true; - if (!F->hasFnAttr(Attribute::StackProtect)) + if (!F->getFnAttributes().hasStackProtectAttr()) return false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index a813fa65ac..230ea038e2 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -552,7 +552,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt index 7d67d0d8be..348308897d 100644 --- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -1,11 +1,6 @@ - -include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -set(system_libs - ${system_libs} - jitprofiling - ) +include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMIntelJITEvents IntelJITEventListener.cpp + jitprofiling.c ) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index c11c17eac7..23f8607322 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" #include "llvm/Support/ValueHandle.h" #include "EventListenerCommon.h" +#include "IntelJITEventsWrapper.h" using namespace llvm; using namespace llvm::jitprofiling; @@ -37,13 +37,13 @@ namespace { class IntelJITEventListener : public JITEventListener { typedef DenseMap<void*, unsigned int> MethodIDMap; - IntelJITEventsWrapper& Wrapper; + OwningPtr<IntelJITEventsWrapper> Wrapper; MethodIDMap MethodIDs; FilenameCache Filenames; public: - IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper) - : Wrapper(libraryWrapper) { + IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) { + Wrapper.reset(libraryWrapper); } ~IntelJITEventListener() { @@ -94,7 +94,7 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat( void IntelJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, const EmittedFunctionDetails &Details) { - iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper, + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, F.getName().data(), reinterpret_cast<uint64_t>(FnStart), FnSize); @@ -151,15 +151,15 @@ void IntelJITEventListener::NotifyFunctionEmitted( FunctionMessage.line_number_table = 0; } - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); MethodIDs[FnStart] = FunctionMessage.method_id; } void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { MethodIDMap::iterator I = MethodIDs.find(FnStart); if (I != MethodIDs.end()) { - Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); MethodIDs.erase(I); } } @@ -168,15 +168,13 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { namespace llvm { JITEventListener *JITEventListener::createIntelJITEventListener() { - static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper( - new IntelJITEventsWrapper); - return new IntelJITEventListener(*JITProfilingWrapper); + return new IntelJITEventListener(new IntelJITEventsWrapper); } // for testing JITEventListener *JITEventListener::createIntelJITEventListener( IntelJITEventsWrapper* TestImpl) { - return new IntelJITEventListener(*TestImpl); + return new IntelJITEventListener(TestImpl); } } // namespace llvm diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h new file mode 100644 index 0000000000..7ab08e15a8 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h @@ -0,0 +1,102 @@ +//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a wrapper for the Intel JIT Events API. It allows for the +// implementation of the jitprofiling library to be swapped with an alternative +// implementation (for testing). To include this file, you must have the +// jitprofiling.h header available; it is available in Intel(R) VTune(TM) +// Amplifier XE 2011. +// +//===----------------------------------------------------------------------===// + +#ifndef INTEL_JIT_EVENTS_WRAPPER_H +#define INTEL_JIT_EVENTS_WRAPPER_H + +#include "jitprofiling.h" + +namespace llvm { + +class IntelJITEventsWrapper { + // Function pointer types for testing implementation of Intel jitprofiling + // library + typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*); + typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx ); + typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void); + typedef void (*FinalizeThreadPtr)(void); + typedef void (*FinalizeProcessPtr)(void); + typedef unsigned int (*GetNewMethodIDPtr)(void); + + NotifyEventPtr NotifyEventFunc; + RegisterCallbackExPtr RegisterCallbackExFunc; + IsProfilingActivePtr IsProfilingActiveFunc; + FinalizeThreadPtr FinalizeThreadFunc; + FinalizeProcessPtr FinalizeProcessFunc; + GetNewMethodIDPtr GetNewMethodIDFunc; + +public: + bool isAmplifierRunning() { + return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON; + } + + IntelJITEventsWrapper() + : NotifyEventFunc(::iJIT_NotifyEvent), + RegisterCallbackExFunc(::iJIT_RegisterCallbackEx), + IsProfilingActiveFunc(::iJIT_IsProfilingActive), + FinalizeThreadFunc(::FinalizeThread), + FinalizeProcessFunc(::FinalizeProcess), + GetNewMethodIDFunc(::iJIT_GetNewMethodID) { + } + + IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl, + RegisterCallbackExPtr RegisterCallbackExImpl, + IsProfilingActivePtr IsProfilingActiveImpl, + FinalizeThreadPtr FinalizeThreadImpl, + FinalizeProcessPtr FinalizeProcessImpl, + GetNewMethodIDPtr GetNewMethodIDImpl) + : NotifyEventFunc(NotifyEventImpl), + RegisterCallbackExFunc(RegisterCallbackExImpl), + IsProfilingActiveFunc(IsProfilingActiveImpl), + FinalizeThreadFunc(FinalizeThreadImpl), + FinalizeProcessFunc(FinalizeProcessImpl), + GetNewMethodIDFunc(GetNewMethodIDImpl) { + } + + // Sends an event anncouncing that a function has been emitted + // return values are event-specific. See Intel documentation for details. + int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { + if (!NotifyEventFunc) + return -1; + return NotifyEventFunc(EventType, EventSpecificData); + } + + // Registers a callback function to receive notice of profiling state changes + void iJIT_RegisterCallbackEx(void *UserData, + iJIT_ModeChangedEx NewModeCallBackFuncEx) { + if (RegisterCallbackExFunc) + RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx); + } + + // Returns the current profiler mode + iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) { + if (!IsProfilingActiveFunc) + return iJIT_NOTHING_RUNNING; + return IsProfilingActiveFunc(); + } + + // Generates a locally unique method ID for use in code registration + unsigned int iJIT_GetNewMethodID(void) { + if (!GetNewMethodIDFunc) + return -1; + return GetNewMethodIDFunc(); + } +}; + +} //namespace llvm + +#endif //INTEL_JIT_EVENTS_WRAPPER_H diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile index ba75ac6f64..dcf3126cc5 100644 --- a/lib/ExecutionEngine/IntelJITEvents/Makefile +++ b/lib/ExecutionEngine/IntelJITEvents/Makefile @@ -11,7 +11,8 @@ LIBRARYNAME = LLVMIntelJITEvents include $(LEVEL)/Makefile.config -SOURCES := IntelJITEventListener.cpp -CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +SOURCES := IntelJITEventListener.cpp \ + jitprofiling.c +CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h new file mode 100644 index 0000000000..238065fe0a --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_config.h @@ -0,0 +1,449 @@ +/*===-- ittnotify_config.h - JIT Profiling API internal config-----*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API internal config. + * + *===----------------------------------------------------------------------===*/ +#ifndef _ITTNOTIFY_CONFIG_H_ +#define _ITTNOTIFY_CONFIG_H_ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif /* _WIN32 */ +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define STDCALL /* not supported on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define STDCALL __attribute__ ((stdcall)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI CDECL +#define LIBITTAPI CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL CDECL +#define LIBITTAPI_CALL CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#endif /* __STRICT_ANSI__ */ +#define ITT_INLINE_ATTRIBUTE __attribute__ ((always_inline)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifndef ITT_ARCH_IA32 +# define ITT_ARCH_IA32 1 +#endif /* ITT_ARCH_IA32 */ + +#ifndef ITT_ARCH_IA32E +# define ITT_ARCH_IA32E 2 +#endif /* ITT_ARCH_IA32E */ + +#ifndef ITT_ARCH_IA64 +# define ITT_ARCH_IA64 3 +#endif /* ITT_ARCH_IA64 */ + +#ifndef ITT_ARCH +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define ITT_ARCH ITT_ARCH_IA32E +# elif defined _M_IA64 || defined __ia64 +# define ITT_ARCH ITT_ARCH_IA64 +# else +# define ITT_ARCH ITT_ARCH_IA32 +# endif +#endif + +#ifdef __cplusplus +# define ITT_EXTERN_C extern "C" +#else +# define ITT_EXTERN_C /* nothing */ +#endif /* __cplusplus */ + +#define ITT_TO_STR_AUX(x) #x +#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) + +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) +#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) +#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) + +#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } + +/* Replace with snapshot date YYYYMMDD for promotion build. */ +#define API_VERSION_BUILD 20111111 + +#ifndef API_VERSION_NUM +#define API_VERSION_NUM 0.0.0 +#endif /* API_VERSION_NUM */ + +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" + +/* OS communication functions */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <windows.h> +typedef HMODULE lib_t; +typedef DWORD TIDT; +typedef CRITICAL_SECTION mutex_t; +#define MUTEX_INITIALIZER { 0 } +#define strong_alias(name, aliasname) /* empty for Windows */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <dlfcn.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ +#endif /* _GNU_SOURCE */ +#include <pthread.h> +typedef void* lib_t; +typedef pthread_t TIDT; +typedef pthread_mutex_t mutex_t; +#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define strong_alias(name, aliasname) _strong_alias(name, aliasname) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_get_proc(lib, name) GetProcAddress(lib, name) +#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) +#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) +#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_load_lib(name) LoadLibraryA(name) +#define __itt_unload_lib(handle) FreeLibrary(handle) +#define __itt_system_error() (int)GetLastError() +#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) +#define __itt_fstrlen(s) lstrlenA(s) +#define __itt_fstrcpyn(s1, s2, l) lstrcpynA(s1, s2, l) +#define __itt_fstrdup(s) _strdup(s) +#define __itt_thread_id() GetCurrentThreadId() +#define __itt_thread_yield() SwitchToThread() +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return InterlockedIncrement(ptr); +} +#endif /* ITT_SIMPLE_INIT */ +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#define __itt_get_proc(lib, name) dlsym(lib, name) +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} +#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) +#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) +#define __itt_unload_lib(handle) dlclose(handle) +#define __itt_system_error() errno +#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) +#define __itt_fstrlen(s) strlen(s) +#define __itt_fstrcpyn(s1, s2, l) strncpy(s1, s2, l) +#define __itt_fstrdup(s) strdup(s) +#define __itt_thread_id() pthread_self() +#define __itt_thread_yield() sched_yield() +#if ITT_ARCH==ITT_ARCH_IA64 +#ifdef __INTEL_COMPILER +#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) +#else /* __INTEL_COMPILER */ +/* TODO: Add Support for not Intel compilers for IA64 */ +#endif /* __INTEL_COMPILER */ +#else /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) +{ + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(long*)ptr) + : "0"(addend), "m"(*(long*)ptr) + : "memory"); + return result; +} +#endif /* ITT_ARCH==ITT_ARCH_IA64 */ +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return __TBB_machine_fetchadd4(ptr, 1) + 1L; +} +#endif /* ITT_SIMPLE_INIT */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +typedef enum { + __itt_collection_normal = 0, + __itt_collection_paused = 1 +} __itt_collection_state; + +typedef enum { + __itt_thread_normal = 0, + __itt_thread_ignored = 1 +} __itt_thread_state; + +#pragma pack(push, 8) + +typedef struct ___itt_thread_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + TIDT tid; + __itt_thread_state state; /*!< Thread state (paused or normal) */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_thread_info* next; +} __itt_thread_info; + +#include "ittnotify_types.h" /* For __itt_group_id definition */ + +typedef struct ___itt_api_info_20101001 +{ + const char* name; + void** func_ptr; + void* init_func; + __itt_group_id group; +} __itt_api_info_20101001; + +typedef struct ___itt_api_info +{ + const char* name; + void** func_ptr; + void* init_func; + void* null_func; + __itt_group_id group; +} __itt_api_info; + +struct ___itt_domain; +struct ___itt_string_handle; + +typedef struct ___itt_global +{ + unsigned char magic[8]; + unsigned long version_major; + unsigned long version_minor; + unsigned long version_build; + volatile long api_initialized; + volatile long mutex_initialized; + volatile long atomic_counter; + mutex_t mutex; + lib_t lib; + void* error_handler; + const char** dll_path_ptr; + __itt_api_info* api_list_ptr; + struct ___itt_global* next; + /* Joinable structures below */ + __itt_thread_info* thread_list; + struct ___itt_domain* domain_list; + struct ___itt_string_handle* string_list; + __itt_collection_state state; +} __itt_global; + +#pragma pack(pop) + +#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = NULL; \ + h->nameW = n ? _wcsdup(n) : NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = n ? __itt_fstrdup(n) : NULL; \ + h->nameW = NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 0; /* domain is disabled by default */ \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = NULL; \ + h->strW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = name ? __itt_fstrdup(name) : NULL; \ + h->strW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h new file mode 100644 index 0000000000..5d502ba8e8 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/ittnotify_types.h @@ -0,0 +1,63 @@ +//===-- ittnotify_types.h - Intel(R) Performance Analyzer JIT (Just-In-Time) Profiling API internal types. ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _ITTNOTIFY_TYPES_H_ +#define _ITTNOTIFY_TYPES_H_ + +typedef enum ___itt_group_id +{ + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_all = -1 +} __itt_group_id; + +#pragma pack(push, 8) + +typedef struct ___itt_group_list +{ + __itt_group_id id; + const char* name; +} __itt_group_list; + +#pragma pack(pop) + +#define ITT_GROUP_LIST(varname) \ + static __itt_group_list varname[] = { \ + { __itt_group_all, "all" }, \ + { __itt_group_control, "control" }, \ + { __itt_group_thread, "thread" }, \ + { __itt_group_mark, "mark" }, \ + { __itt_group_sync, "sync" }, \ + { __itt_group_fsync, "fsync" }, \ + { __itt_group_jit, "jit" }, \ + { __itt_group_model, "model" }, \ + { __itt_group_counter, "counter" }, \ + { __itt_group_frame, "frame" }, \ + { __itt_group_stitch, "stitch" }, \ + { __itt_group_heap, "heap" }, \ + { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_none, NULL } \ + } + +#endif /* _ITTNOTIFY_TYPES_H_ */ diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c new file mode 100644 index 0000000000..9b0dafbdca --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -0,0 +1,476 @@ +/*===-- jitprofiling.c - JIT (Just-In-Time) Profiling API----------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API implementation. + * + *===----------------------------------------------------------------------===*/ +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <windows.h> +#pragma optimize("", off) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <pthread.h> +#include <dlfcn.h> +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <malloc.h> +#include <stdlib.h> + +#include "jitprofiling.h" + +static const char rcsid[] = "\n@(#) $Revision: 243501 $\n"; + +#define DLL_ENVIRONMENT_VAR "VS_PROFILER" + +#ifndef NEW_DLL_ENVIRONMENT_VAR +#if ITT_ARCH==ITT_ARCH_IA32 +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32" +#else +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64" +#endif +#endif /* NEW_DLL_ENVIRONMENT_VAR */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define DEFAULT_DLLNAME "JitPI.dll" +HINSTANCE m_libHandle = NULL; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define DEFAULT_DLLNAME "libJitPI.so" +void* m_libHandle = NULL; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/* default location of JIT profiling agent on Android */ +#define ANDROID_JIT_AGENT_PATH "/data/intel/libittnotify.so" + +/* the function pointers */ +typedef unsigned int(*TPInitialize)(void); +static TPInitialize FUNC_Initialize=NULL; + +typedef unsigned int(*TPNotify)(unsigned int, void*); +static TPNotify FUNC_NotifyEvent=NULL; + +static iJIT_IsProfilingActiveFlags executionMode = iJIT_NOTHING_RUNNING; + +/* end collector dll part. */ + +/* loadiJIT_Funcs() : this function is called just in the beginning + * and is responsible to load the functions from BistroJavaCollector.dll + * result: + * on success: the functions loads, iJIT_DLL_is_missing=0, return value = 1 + * on failure: the functions are NULL, iJIT_DLL_is_missing=1, return value = 0 + */ +static int loadiJIT_Funcs(void); + +/* global representing whether the BistroJavaCollector can't be loaded */ +static int iJIT_DLL_is_missing = 0; + +/* Virtual stack - the struct is used as a virtual stack for each thread. + * Every thread initializes with a stack of size INIT_TOP_STACK. + * Every method entry decreases from the current stack point, + * and when a thread stack reaches its top of stack (return from the global + * function), the top of stack and the current stack increase. Notice that + * when returning from a function the stack pointer is the address of + * the function return. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static DWORD threadLocalStorageHandle = 0; +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static pthread_key_t threadLocalStorageHandle = (pthread_key_t)0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#define INIT_TOP_Stack 10000 + +typedef struct +{ + unsigned int TopStack; + unsigned int CurrentStack; +} ThreadStack, *pThreadStack; + +/* end of virtual stack. */ + +/* + * The function for reporting virtual-machine related events to VTune. + * Note: when reporting iJVM_EVENT_TYPE_ENTER_NIDS, there is no need to fill + * in the stack_id field in the iJIT_Method_NIDS structure, as VTune fills it. + * The return value in iJVM_EVENT_TYPE_ENTER_NIDS && + * iJVM_EVENT_TYPE_LEAVE_NIDS events will be 0 in case of failure. + * in iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED event + * it will be -1 if EventSpecificData == 0 otherwise it will be 0. +*/ + +ITT_EXTERN_C int JITAPI +iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData) +{ + int ReturnValue; + + /* + * This section is for debugging outside of VTune. + * It creates the environment variables that indicates call graph mode. + * If running outside of VTune remove the remark. + * + * + * static int firstTime = 1; + * char DoCallGraph[12] = "DoCallGraph"; + * if (firstTime) + * { + * firstTime = 0; + * SetEnvironmentVariable( "BISTRO_COLLECTORS_DO_CALLGRAPH", DoCallGraph); + * } + * + * end of section. + */ + + /* initialization part - the functions have not been loaded yet. This part + * will load the functions, and check if we are in Call Graph mode. + * (for special treatment). + */ + if (!FUNC_NotifyEvent) + { + if (iJIT_DLL_is_missing) + return 0; + + /* load the Function from the DLL */ + if (!loadiJIT_Funcs()) + return 0; + + /* Call Graph initialization. */ + } + + /* If the event is method entry/exit, check that in the current mode + * VTune is allowed to receive it + */ + if ((event_type == iJVM_EVENT_TYPE_ENTER_NIDS || + event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) && + (executionMode != iJIT_CALLGRAPH_ON)) + { + return 0; + } + /* This section is performed when method enter event occurs. + * It updates the virtual stack, or creates it if this is the first + * method entry in the thread. The stack pointer is decreased. + */ + if (event_type == iJVM_EVENT_TYPE_ENTER_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* initialize the stack. */ + threadStack = (pThreadStack) calloc (sizeof(ThreadStack), 1); + threadStack->TopStack = INIT_TOP_Stack; + threadStack->CurrentStack = INIT_TOP_Stack; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue(threadLocalStorageHandle,(void*)threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle,(void*)threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* decrease the stack. */ + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (threadStack->CurrentStack)--; + } + + /* This section is performed when method leave event occurs + * It updates the virtual stack. + * Increases the stack pointer. + * If the stack pointer reached the top (left the global function) + * increase the pointer and the top pointer. + */ + if (event_type == iJVM_EVENT_TYPE_LEAVE_NIDS) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_NIDS) EventSpecificData)->method_id <= 999 ) + return 0; + + if (!threadStack) + { + /* Error: first report in this thread is method exit */ + exit (1); + } + + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + ++(threadStack->CurrentStack) + 1; + + if (((piJIT_Method_NIDS) EventSpecificData)->stack_id + > threadStack->TopStack) + ((piJIT_Method_NIDS) EventSpecificData)->stack_id = + (unsigned int)-1; + } + + if (event_type == iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED) + { + /* check for use of reserved method IDs */ + if ( ((piJIT_Method_Load) EventSpecificData)->method_id <= 999 ) + return 0; + } + + ReturnValue = (int)FUNC_NotifyEvent(event_type, EventSpecificData); + + return ReturnValue; +} + +/* The new mode call back routine */ +ITT_EXTERN_C void JITAPI +iJIT_RegisterCallbackEx(void *userdata, iJIT_ModeChangedEx + NewModeCallBackFuncEx) +{ + /* is it already missing... or the load of functions from the DLL failed */ + if (iJIT_DLL_is_missing || !loadiJIT_Funcs()) + { + /* then do not bother with notifications */ + NewModeCallBackFuncEx(userdata, iJIT_NO_NOTIFICATIONS); + /* Error: could not load JIT functions. */ + return; + } + /* nothing to do with the callback */ +} + +/* + * This function allows the user to query in which mode, if at all, + *VTune is running + */ +ITT_EXTERN_C iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive() +{ + if (!iJIT_DLL_is_missing) + { + loadiJIT_Funcs(); + } + + return executionMode; +} + +/* this function loads the collector dll (BistroJavaCollector) + * and the relevant functions. + * on success: all functions load, iJIT_DLL_is_missing = 0, return value = 1 + * on failure: all functions are NULL, iJIT_DLL_is_missing = 1, return value = 0 + */ +static int loadiJIT_Funcs() +{ + static int bDllWasLoaded = 0; + char *dllName = (char*)rcsid; /* !! Just to avoid unused code elimination */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + DWORD dNameLength = 0; +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if(bDllWasLoaded) + { + /* dll was already loaded, no need to do it for the second time */ + return 1; + } + + /* Assumes that the DLL will not be found */ + iJIT_DLL_is_missing = 1; + FUNC_NotifyEvent = NULL; + + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + /* Try to get the dll name from the environment */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN + dNameLength = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(NEW_DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryExA(dllName, + NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + } + free(dllName); + } else { + /* Try to use old VS_PROFILER variable */ + dNameLength = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, NULL, 0); + if (dNameLength) + { + DWORD envret = 0; + dllName = (char*)malloc(sizeof(char) * (dNameLength + 1)); + envret = GetEnvironmentVariableA(DLL_ENVIRONMENT_VAR, + dllName, dNameLength); + if (envret) + { + /* Try to load the dll from the PATH... */ + m_libHandle = LoadLibraryA(dllName); + } + free(dllName); + } + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dllName = getenv(NEW_DLL_ENVIRONMENT_VAR); + if (!dllName) + dllName = getenv(DLL_ENVIRONMENT_VAR); +#ifdef ANDROID + if (!dllName) + dllName = ANDROID_JIT_AGENT_PATH; +#endif + if (dllName) + { + /* Try to load the dll from the PATH... */ + m_libHandle = dlopen(dllName, RTLD_LAZY); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + if (!m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + m_libHandle = LoadLibraryA(DEFAULT_DLLNAME); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + /* if the dll wasn't loaded - exit. */ + if (!m_libHandle) + { + iJIT_DLL_is_missing = 1; /* don't try to initialize + * JIT agent the second time + */ + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_NotifyEvent = (TPNotify)GetProcAddress(m_libHandle, "NotifyEvent"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_NotifyEvent = (TPNotify)dlsym(m_libHandle, "NotifyEvent"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_NotifyEvent) + { + FUNC_Initialize = NULL; + return 0; + } + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FUNC_Initialize = (TPInitialize)GetProcAddress(m_libHandle, "Initialize"); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + FUNC_Initialize = (TPInitialize)dlsym(m_libHandle, "Initialize"); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (!FUNC_Initialize) + { + FUNC_NotifyEvent = NULL; + return 0; + } + + executionMode = (iJIT_IsProfilingActiveFlags)FUNC_Initialize(); + + bDllWasLoaded = 1; + iJIT_DLL_is_missing = 0; /* DLL is ok. */ + + /* + * Call Graph mode: init the thread local storage + * (need to store the virtual stack there). + */ + if ( executionMode == iJIT_CALLGRAPH_ON ) + { + /* Allocate a thread local storage slot for the thread "stack" */ + if (!threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + threadLocalStorageHandle = TlsAlloc(); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_create(&threadLocalStorageHandle, NULL); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + + return 1; +} + +/* + * This function should be called by the user whenever a thread ends, + * to free the thread "virtual stack" storage + */ +ITT_EXTERN_C void JITAPI FinalizeThread() +{ + if (threadLocalStorageHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + pThreadStack threadStack = + (pThreadStack)TlsGetValue (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pThreadStack threadStack = + (pThreadStack)pthread_getspecific(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (threadStack) + { + free (threadStack); + threadStack = NULL; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsSetValue (threadLocalStorageHandle, threadStack); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_setspecific(threadLocalStorageHandle, threadStack); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + } +} + +/* + * This function should be called by the user when the process ends, + * to free the local storage index +*/ +ITT_EXTERN_C void JITAPI FinalizeProcess() +{ + if (m_libHandle) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + FreeLibrary(m_libHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlclose(m_libHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + m_libHandle = NULL; + } + + if (threadLocalStorageHandle) +#if ITT_PLATFORM==ITT_PLATFORM_WIN + TlsFree (threadLocalStorageHandle); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + pthread_key_delete(threadLocalStorageHandle); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +} + +/* + * This function should be called by the user for any method once. + * The function will return a unique method ID, the user should maintain + * the ID for each method + */ +ITT_EXTERN_C unsigned int JITAPI iJIT_GetNewMethodID() +{ + static unsigned int methodID = 0x100000; + + if (methodID == 0) + return 0; /* ERROR : this is not a valid value */ + + return methodID++; +} diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h new file mode 100644 index 0000000000..f33fb83ba9 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.h @@ -0,0 +1,254 @@ +/*===-- jitprofiling.h - JIT Profiling API-------------------------*- C -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===* + * + * This file provides Intel(R) Performance Analyzer JIT (Just-In-Time) + * Profiling API declaration. + * + *===----------------------------------------------------------------------===*/ +#ifndef __JITPROFILING_H__ +#define __JITPROFILING_H__ + +/* + * Various constants used by functions + */ + +/* event notification */ +typedef enum iJIT_jvm_event +{ + + /* shutdown */ + + /* + * Program exiting EventSpecificData NA + */ + iJVM_EVENT_TYPE_SHUTDOWN = 2, + + /* JIT profiling */ + + /* + * issued after method code jitted into memory but before code is executed + * EventSpecificData is an iJIT_Method_Load + */ + iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED=13, + + /* issued before unload. Method code will no longer be executed, but code + * and info are still in memory. The VTune profiler may capture method + * code only at this point EventSpecificData is iJIT_Method_Id + */ + iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + + /* Method Profiling */ + + /* method name, Id and stack is supplied + * issued when a method is about to be entered EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_ENTER_NIDS = 19, + + /* method name, Id and stack is supplied + * issued when a method is about to be left EventSpecificData is + * iJIT_Method_NIDS + */ + iJVM_EVENT_TYPE_LEAVE_NIDS +} iJIT_JVM_EVENT; + +typedef enum _iJIT_ModeFlags +{ + /* No need to Notify VTune, since VTune is not running */ + iJIT_NO_NOTIFICATIONS = 0x0000, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + * ) + * for all the method already jitted + */ + iJIT_BE_NOTIFY_ON_LOAD = 0x0001, + + /* when turned on the jit must call + * iJIT_NotifyEvent + * ( + * iJVM_EVENT_TYPE_METHOD_UNLOAD_FINISHED, + * ) for all the method that are unloaded + */ + iJIT_BE_NOTIFY_ON_UNLOAD = 0x0002, + + /* when turned on the jit must instrument all + * the currently jited code with calls on + * method entries + */ + iJIT_BE_NOTIFY_ON_METHOD_ENTRY = 0x0004, + + /* when turned on the jit must instrument all + * the currently jited code with calls + * on method exit + */ + iJIT_BE_NOTIFY_ON_METHOD_EXIT = 0x0008 + +} iJIT_ModeFlags; + + + /* Flags used by iJIT_IsProfilingActive() */ +typedef enum _iJIT_IsProfilingActiveFlags +{ + /* No profiler is running. Currently not used */ + iJIT_NOTHING_RUNNING = 0x0000, + + /* Sampling is running. This is the default value + * returned by iJIT_IsProfilingActive() + */ + iJIT_SAMPLING_ON = 0x0001, + + /* Call Graph is running */ + iJIT_CALLGRAPH_ON = 0x0002 + +} iJIT_IsProfilingActiveFlags; + +/* Enumerator for the environment of methods*/ +typedef enum _iJDEnvironmentType +{ + iJDE_JittingAPI = 2 +} iJDEnvironmentType; + +/********************************** + * Data structures for the events * + **********************************/ + +/* structure for the events: + * iJVM_EVENT_TYPE_METHOD_UNLOAD_START + */ + +typedef struct _iJIT_Method_Id +{ + /* Id of the method (same as the one passed in + * the iJIT_Method_Load struct + */ + unsigned int method_id; + +} *piJIT_Method_Id, iJIT_Method_Id; + + +/* structure for the events: + * iJVM_EVENT_TYPE_ENTER_NIDS, + * iJVM_EVENT_TYPE_LEAVE_NIDS, + * iJVM_EVENT_TYPE_EXCEPTION_OCCURRED_NIDS + */ + +typedef struct _iJIT_Method_NIDS +{ + /* unique method ID */ + unsigned int method_id; + + /* NOTE: no need to fill this field, it's filled by VTune */ + unsigned int stack_id; + + /* method name (just the method, without the class) */ + char* method_name; +} *piJIT_Method_NIDS, iJIT_Method_NIDS; + +/* structures for the events: + * iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED + */ + +typedef struct _LineNumberInfo +{ + /* x86 Offset from the begining of the method*/ + unsigned int Offset; + + /* source line number from the begining of the source file */ + unsigned int LineNumber; + +} *pLineNumberInfo, LineNumberInfo; + +typedef struct _iJIT_Method_Load +{ + /* unique method ID - can be any unique value, (except 0 - 999) */ + unsigned int method_id; + + /* method name (can be with or without the class and signature, in any case + * the class name will be added to it) + */ + char* method_name; + + /* virtual address of that method - This determines the method range for the + * iJVM_EVENT_TYPE_ENTER/LEAVE_METHOD_ADDR events + */ + void* method_load_address; + + /* Size in memory - Must be exact */ + unsigned int method_size; + + /* Line Table size in number of entries - Zero if none */ + unsigned int line_number_size; + + /* Pointer to the begining of the line numbers info array */ + pLineNumberInfo line_number_table; + + /* unique class ID */ + unsigned int class_id; + + /* class file name */ + char* class_file_name; + + /* source file name */ + char* source_file_name; + + /* bits supplied by the user for saving in the JIT file */ + void* user_data; + + /* the size of the user data buffer */ + unsigned int user_data_size; + + /* NOTE: no need to fill this field, it's filled by VTune */ + iJDEnvironmentType env; + +} *piJIT_Method_Load, iJIT_Method_Load; + +/* API Functions */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef CDECL +# if defined WIN32 || defined _WIN32 +# define CDECL __cdecl +# else /* defined WIN32 || defined _WIN32 */ +# if defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define CDECL /* not actual on x86_64 platform */ +# else /* _M_X64 || _M_AMD64 || __x86_64__ */ +# define CDECL __attribute__ ((cdecl)) +# endif /* _M_X64 || _M_AMD64 || __x86_64__ */ +# endif /* defined WIN32 || defined _WIN32 */ +#endif /* CDECL */ + +#define JITAPI CDECL + +/* called when the settings are changed with new settings */ +typedef void (*iJIT_ModeChangedEx)(void *UserData, iJIT_ModeFlags Flags); + +int JITAPI iJIT_NotifyEvent(iJIT_JVM_EVENT event_type, void *EventSpecificData); + +/* The new mode call back routine */ +void JITAPI iJIT_RegisterCallbackEx(void *userdata, + iJIT_ModeChangedEx NewModeCallBackFuncEx); + +iJIT_IsProfilingActiveFlags JITAPI iJIT_IsProfilingActive(void); + +void JITAPI FinalizeThread(void); + +void JITAPI FinalizeProcess(void); + +unsigned int JITAPI iJIT_GetNewMethodID(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __JITPROFILING_H__ */ diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8fed48cef2..ffa79761f2 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -267,7 +267,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { /* *** */ -void MCTargetExpr::Anchor() {} +void MCTargetExpr::anchor() {} /* *** */ diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f143e6d0ad..d07a3c9e7f 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1775,7 +1775,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (exponent+1 >= (int)semanticsPrecision(*semantics)) + if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index dd218f6099..00be43b750 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Errno.h" #include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Support/raw_ostream.h" #if HAVE_STRING_H #include <string.h> @@ -39,7 +40,7 @@ std::string StrError(int errnum) { const int MaxErrStrLen = 2000; char buffer[MaxErrStrLen]; buffer[0] = '\0'; - char* str = buffer; + std::string str; #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. if (errnum) @@ -49,6 +50,7 @@ std::string StrError(int errnum) { str = strerror_r(errnum,buffer,MaxErrStrLen-1); # else strerror_r(errnum,buffer,MaxErrStrLen-1); + str = buffer; # endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" if (errnum) @@ -58,12 +60,13 @@ std::string StrError(int errnum) { // the buffer as fast as possible to minimize impact // of collision of strerror in multiple threads. if (errnum) - strncpy(buffer,strerror(errnum),MaxErrStrLen-1); - buffer[MaxErrStrLen-1] = '\0'; + str = strerror(errnum); #else // Strange that this system doesn't even have strerror // but, oh well, just use a generic message - sprintf(buffer, "Error #%d", errnum); + raw_string_ostream stream(str); + stream << "Error #" << errnum; + stream.flush(); #endif return str; } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index a13b9e2f87..9ee3f2db92 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -234,6 +234,8 @@ std::string sys::getHostCPUName() { case 37: // Intel Core i7, laptop version. case 44: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. + case 46: // Nehalem EX + case 47: // Westmere EX return "corei7"; // SandyBridge: diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index f70e60d3f5..b82371a7b6 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -267,7 +267,8 @@ Path::GetCurrentDirectory() { } #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ - defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \ + defined(__linux__) || defined(__CYGWIN__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -345,9 +346,17 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { return Path(exe_path); #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; - ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path)); - if (len >= 0) - return Path(StringRef(exe_path, len)); + StringRef aPath("/proc/self/exe"); + if (sys::fs::exists(aPath)) { + // /proc is not always mounted under Linux (chroot for example). + ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path)); + if (len >= 0) + return Path(StringRef(exe_path, len)); + } else { + // Fall back to the classical detection. + if (getprogpath(exe_path, argv0) != NULL) + return Path(exe_path); + } #elif defined(HAVE_DLFCN_H) // Use dladdr to get executable path if available. Dl_info DLInfo; diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index e05e81acaf..6d874ea0d0 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -249,7 +249,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { // On glibc systems we have the 'backtrace' function, which works nicely, but // doesn't demangle symbols. static void PrintStackTrace(void *) { -#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACE) +#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) static void* StackTrace[256]; // Use backtrace() to output a backtrace on Linux systems with glibc. int depth = backtrace(StackTrace, diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 696768ba9d..3dfac66b77 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -794,7 +794,7 @@ mapped_file_region::mapped_file_region(const Twine &path, SmallVector<wchar_t, 128> path_utf16; // Convert path to UTF-16. - if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) + if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))) return; // Get file handle for creating a file mapping. diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 7c353c89bb..34df636a72 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -903,6 +903,7 @@ bool Scanner::consume(uint32_t Expected) { void Scanner::skip(uint32_t Distance) { Current += Distance; Column += Distance; + assert(Current <= End && "Skipped past the end"); } bool Scanner::isBlankOrBreak(StringRef::iterator Position) { @@ -1239,6 +1240,12 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { } } } + + if (Current == End) { + setError("Expected quote at end of scalar", Current); + return false; + } + skip(1); // Skip ending quote. Token T; T.Kind = Token::TK_Scalar; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 9a8cab8ecc..0ac92f1ee8 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -40,6 +40,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalBaseRegPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createMLxExpansionPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 38509a3400..00bf1b85ec 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,9 +32,6 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; -def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", - "Enable VFP4 instructions", - [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; @@ -44,10 +41,16 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3, FeatureFP16]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; +def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", + "HasHardwareDivideInARM", "true", + "Enable divide instructions in ARM mode">; def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", @@ -139,6 +142,13 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", [FeatureVMLxForwarding, FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", + "Swift ARM processors", + [FeatureNEONForFP, FeatureT2XtPk, + FeatureVFP4, FeatureMP, FeatureHWDiv, + FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureHasSlowFPVMLx]>; + // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", @@ -240,6 +250,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; +// Swift uArch Processors. +def : ProcessorModel<"swift", SwiftModel, + [ProcSwift, HasV7Ops, FeatureNEON, + FeatureDB, FeatureDSPThumb2, + FeatureHasRAS]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c08294918e..42b6bc3cdc 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -49,6 +49,11 @@ static cl::opt<bool> WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); +static cl::opt<unsigned> +SwiftPartialUpdateClearance("swift-partial-update-clearance", + cl::Hidden, cl::init(12), + cl::desc("Clearance before partial register updates")); + /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { uint16_t MLxOpc; // MLA / MLS opcode @@ -1389,7 +1394,6 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: - case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: case ARM::t2LDRSHi12: @@ -1528,6 +1532,14 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; } +bool +ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + // Reduce false anti-dependencies to let Swift's out-of-order execution + // engine do its thing. + return Subtarget.isSwift(); +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -2344,6 +2356,229 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, return true; } +static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, + const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: { + const MCInstrDesc &Desc = MI->getDesc(); + int UOps = ItinData->getNumMicroOps(Desc.getSchedClass()); + assert(UOps >= 0 && "bad # UOps"); + return UOps; + } + + case ARM::LDRrs: + case ARM::LDRBrs: + case ARM::STRrs: + case ARM::STRBrs: { + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRH: + case ARM::STRH: { + if (!MI->getOperand(2).getReg()) + return 1; + + unsigned ShOpVal = MI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 1; + return 2; + } + + case ARM::LDRSB: + case ARM::LDRSH: + return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2; + + case ARM::LDRSB_POST: + case ARM::LDRSH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 4 : 3; + } + + case ARM::LDR_PRE_REG: + case ARM::LDRB_PRE_REG: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rt == Rm) + return 3; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::STR_PRE_REG: + case ARM::STRB_PRE_REG: { + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 2; + return 3; + } + + case ARM::LDRH_PRE: + case ARM::STRH_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (!Rm) + return 2; + if (Rt == Rm) + return 3; + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) + ? 3 : 2; + } + + case ARM::LDR_POST_REG: + case ARM::LDRB_POST_REG: + case ARM::LDRH_POST: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + return (Rt == Rm) ? 3 : 2; + } + + case ARM::LDR_PRE_IMM: + case ARM::LDRB_PRE_IMM: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_IMM: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRH_POST: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STR_PRE_IMM: + return 2; + + case ARM::LDRSB_PRE: + case ARM::LDRSH_PRE: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm == 0) + return 3; + unsigned Rt = MI->getOperand(0).getReg(); + if (Rt == Rm) + return 4; + unsigned ShOpVal = MI->getOperand(4).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + return 3; + return 4; + } + + case ARM::LDRD: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return (Rt == Rn) ? 3 : 2; + } + + case ARM::STRD: { + unsigned Rm = MI->getOperand(3).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3; + return 2; + } + + case ARM::LDRD_POST: + case ARM::t2LDRD_POST: + return 3; + + case ARM::STRD_POST: + case ARM::t2STRD_POST: + return 4; + + case ARM::LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return (Rt == Rn) ? 4 : 3; + } + + case ARM::t2LDRD_PRE: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(3).getReg(); + return (Rt == Rn) ? 4 : 3; + } + + case ARM::STRD_PRE: { + unsigned Rm = MI->getOperand(4).getReg(); + if (Rm) + return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4; + return 3; + } + + case ARM::t2STRD_PRE: + return 3; + + case ARM::t2LDR_POST: + case ARM::t2LDRB_POST: + case ARM::t2LDRB_PRE: + case ARM::t2LDRSBi12: + case ARM::t2LDRSBi8: + case ARM::t2LDRSBpci: + case ARM::t2LDRSBs: + case ARM::t2LDRH_POST: + case ARM::t2LDRH_PRE: + case ARM::t2LDRSBT: + case ARM::t2LDRSB_POST: + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSH_POST: + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSHi12: + case ARM::t2LDRSHi8: + case ARM::t2LDRSHpci: + case ARM::t2LDRSHs: + return 2; + + case ARM::t2LDRDi8: { + unsigned Rt = MI->getOperand(0).getReg(); + unsigned Rn = MI->getOperand(2).getReg(); + return (Rt == Rn) ? 3 : 2; + } + + case ARM::t2STRB_POST: + case ARM::t2STRB_PRE: + case ARM::t2STRBs: + case ARM::t2STRDi8: + case ARM::t2STRH_POST: + case ARM::t2STRH_PRE: + case ARM::t2STRHs: + case ARM::t2STR_POST: + case ARM::t2STR_PRE: + case ARM::t2STRs: + return 2; + } +} + // Return the number of 32-bit words loaded by LDM or stored by STM. If this // can't be easily determined return 0 (missing MachineMemOperand). // @@ -2384,8 +2619,12 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); int ItinUOps = ItinData->getNumMicroOps(Class); - if (ItinUOps >= 0) + if (ItinUOps >= 0) { + if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) + return getNumMicroOpsSwiftLdSt(ItinData, MI); + return ItinUOps; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2454,7 +2693,43 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMIA_UPD: case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; - if (Subtarget.isCortexA8()) { + if (Subtarget.isSwift()) { + // rdar://8402126 + int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. + switch (Opc) { + default: break; + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + ++UOps; // One for base register writeback. + break; + case ARM::LDMIA_RET: + case ARM::tPOP_RET: + case ARM::t2LDMIA_RET: + UOps += 2; // One for base reg wb, one for write to pc. + break; + } + return UOps; + } else if (Subtarget.isCortexA8()) { if (NumRegs < 4) return 2; // 4 registers would be issued: 2, 2. @@ -2463,7 +2738,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, if (NumRegs % 2) ++A8UOps; return A8UOps; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2496,7 +2771,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = RegNo / 2 + 1; if (RegNo % 2) ++DefCycle; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = RegNo; bool isSLoad = false; @@ -2540,7 +2815,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, DefCycle = 1; // Result latency is issue cycle + 2: E2. DefCycle += 2; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { DefCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2571,7 +2846,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = RegNo / 2 + 1; if (RegNo % 2) ++UseCycle; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = RegNo; bool isSStore = false; @@ -2612,7 +2887,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, UseCycle = 2; // Read in E3. UseCycle += 2; - } else if (Subtarget.isLikeA9()) { + } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { UseCycle = (RegNo / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. @@ -2822,6 +3097,37 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget, break; } } + } else if (Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub; + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (!isSub && + (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))) + Adjust -= 2; + else if (!isSub && + ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Adjust; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) + Adjust -= 2; + break; + } + } } if (DefAlign < 8 && Subtarget.isLikeA9()) { @@ -2998,7 +3304,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr()) --Latency; } return Latency; @@ -3048,7 +3354,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (!UseNode->isMachineOpcode()) { int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); - if (Subtarget.isLikeA9()) + if (Subtarget.isLikeA9() || Subtarget.isSwift()) return Latency <= 2 ? 1 : Latency - 1; else return Latency <= 3 ? 1 : Latency - 2; @@ -3092,6 +3398,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; } } + } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) { + // FIXME: Properly handle all of the latency adjustments for address + // writeback. + switch (DefMCID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + ((ShImm == 1 || ShImm == 2 || ShImm == 3) && + ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + Latency -= 2; + else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl 0-3 only. + Latency -= 2; + break; + } + } } if (DefAlign < 8 && Subtarget.isLikeA9()) @@ -3660,6 +3993,122 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { } +//===----------------------------------------------------------------------===// +// Partial register updates +//===----------------------------------------------------------------------===// +// +// Swift renames NEON registers with 64-bit granularity. That means any +// instruction writing an S-reg implicitly reads the containing D-reg. The +// problem is mostly avoided by translating f32 operations to v2f32 operations +// on D-registers, but f32 loads are still a problem. +// +// These instructions can load an f32 into a NEON register: +// +// VLDRS - Only writes S, partial D update. +// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. +// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. +// +// FCONSTD can be used as a dependency-breaking instruction. + + +unsigned ARMBaseInstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + // Only Swift has partial register update problems. + if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + return 0; + + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + if (MO.readsReg()) + return 0; + unsigned Reg = MO.getReg(); + int UseOp = -1; + + switch(MI->getOpcode()) { + // Normal instructions writing only an S-register. + case ARM::VLDRS: + case ARM::FCONSTS: + case ARM::VMOVSR: + // rdar://problem/8791586 + case ARM::VMOVv8i8: + case ARM::VMOVv4i16: + case ARM::VMOVv2i32: + case ARM::VMOVv2f32: + case ARM::VMOVv1i64: + UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI); + break; + + // Explicitly reads the dependency. + case ARM::VLD1LNd32: + UseOp = 1; + break; + default: + return 0; + } + + // If this instruction actually reads a value from Reg, there is no unwanted + // dependency. + if (UseOp != -1 && MI->getOperand(UseOp).readsReg()) + return 0; + + // We must be able to clobber the whole D-reg. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Virtual register must be a foo:ssub_0<def,undef> operand. + if (!MO.getSubReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else if (ARM::SPRRegClass.contains(Reg)) { + // Physical register: MI must define the full D-reg. + unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0, + &ARM::DPRRegClass); + if (!DReg || !MI->definesRegister(DReg, TRI)) + return 0; + } + + // MI has an unwanted D-register dependency. + // Avoid defs in the previous N instructrions. + return SwiftPartialUpdateClearance; +} + +// Break a partial register dependency after getPartialRegUpdateClearance +// returned non-zero. +void ARMBaseInstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, + unsigned OpNum, + const TargetRegisterInfo *TRI) const { + assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def"); + assert(TRI && "Need TRI instance"); + + const MachineOperand &MO = MI->getOperand(OpNum); + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Can't break virtual register dependencies."); + unsigned DReg = Reg; + + // If MI defines an S-reg, find the corresponding D super-register. + if (ARM::SPRRegClass.contains(Reg)) { + DReg = ARM::D0 + (Reg - ARM::S0) / 2; + assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken"); + } + + assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps"); + assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg"); + + // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines + // the full D-register by loading the same value to both lanes. The + // instruction is micro-coded with 2 uops, so don't do this until we can + // properly schedule micro-coded instuctions. The dispatcher stalls cause + // too big regressions. + + // Insert the dependency-breaking FCONSTD before MI. + // 96 is the encoding of 0.5, but the actual value doesn't matter here. + AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(ARM::FCONSTD), DReg).addImm(96)); + MI->addRegisterKilled(DReg, TRI, true); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 304ccc087c..8f4f47b34f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -182,10 +182,13 @@ public: virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, const BranchProbability - &Probability) const { + &Probability) const { return NumCycles == 1; } + virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const; + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -235,6 +238,10 @@ public: getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr*, unsigned, + const TargetRegisterInfo*) const; + void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, + const TargetRegisterInfo *TRI) const; /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 277dd57ef2..1cba45c3a5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -566,7 +566,7 @@ needsStackRealignment(const MachineFunction &MF) const { const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + F->getFnAttributes().hasStackAlignmentAttr()); return requiresRealignment && canRealignStack(MF); } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index d6ef3f333b..6b49e37e87 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -194,6 +194,7 @@ class ARMFastISel : public FastISel { unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT); // Call handling routines. private: @@ -648,6 +649,9 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Align = TD.getTypeAllocSize(GV->getType()); } + if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_) + return ARMLowerPICELF(GV, Align, VT); + // Grab index. unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); @@ -2801,6 +2805,47 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, return true; } +unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, + unsigned Align, EVT VT) { + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); + ARMConstantPoolConstant *CPV = + ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + unsigned Opc; + unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); + // Load value. + if (isThumb2) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci), DestReg1) + .addConstantPoolIndex(Idx)); + Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; + } else { + // The extra immediate is for addrmode2. + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(ARM::LDRcp), DestReg1) + .addConstantPoolIndex(Idx).addImm(0)); + Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs; + } + + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + if (GlobalBaseReg == 0) { + GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT)); + AFI->setGlobalBaseReg(GlobalBaseReg); + } + + unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), DestReg2) + .addReg(DestReg1) + .addReg(GlobalBaseReg); + if (!UseGOTOFF) + MIB.addImm(0); + AddOptionalDefs(MIB); + + return DestReg2; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2cedf3172c..52374ec4c1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1233,7 +1233,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasNakedAttr()) return; // We are planning to use NEON instructions vst1 / vld1. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a44e2a220a..90ae94b3b2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -347,7 +347,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9()) + + if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + !Subtarget->isSwift()) return true; if (!N->hasOneUse()) @@ -385,12 +387,13 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt) { - if (!Subtarget->isLikeA9()) + if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) return true; if (Shift.hasOneUse()) return true; // R << 2 is free. - return ShOpcVal == ARM_AM::lsl && ShAmt == 2; + return ShOpcVal == ARM_AM::lsl && + (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, @@ -518,7 +521,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return false; // @LOCALMOD-END if (N.getOpcode() == ISD::MUL && - (!Subtarget->isLikeA9() || N.hasOneUse())) { + ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -582,7 +585,8 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -630,7 +634,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, // @LOCALMOD-END if (N.getOpcode() == ISD::MUL && - (!Subtarget->isLikeA9() || N.hasOneUse())) { + (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -697,7 +701,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, } } - if (Subtarget->isLikeA9() && !N.hasOneUse()) { + if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; Offset = CurDAG->getRegister(0, MVT::i32); @@ -753,7 +757,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, // Try matching (R shl C) + (R). if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isLikeA9() || N.getOperand(0).hasOneUse())) { + !(Subtarget->isLikeA9() || Subtarget->isSwift() || + N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2e7588b29f..556dacffcc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -645,9 +645,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - // These are expanded into libcalls. - if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { - // v7M has a hardware divider + if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && + !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { + // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } @@ -5873,7 +5873,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // ldrex dest, ptr // (sign extend dest, if required) // cmp dest, incr - // cmov.cond scratch2, dest, incr + // cmov.cond scratch2, incr, dest // strex scratch, scratch2, ptr // cmp scratch, #0 // bne- loopMBB @@ -5896,7 +5896,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(oldval).addReg(incr)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); if (strOpc == ARM::t2STREX) @@ -6605,7 +6605,7 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + if (!MF->getFunction()->getFnAttributes().hasNoImplicitFloatAttr() && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -9343,7 +9343,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, // See if we can use NEON instructions for this... if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat) && + !F->getFnAttributes().hasNoImplicitFloatAttr() && Subtarget->hasNEON()) { if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { return MVT::v4i32; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index c8966fb97a..67a6820932 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -846,6 +846,23 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, let Inst{3-0} = Rm; } +// Division instructions. +class ADivA1I<bits<3> opcod, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin, + opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{27-23} = 0b01110; + let Inst{22-20} = opcod; + let Inst{19-16} = Rd; + let Inst{15-12} = 0b1111; + let Inst{11-8} = Rm; + let Inst{7-4} = 0b0001; + let Inst{3-0} = Rn; +} + // PKH instructions def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; @@ -893,6 +910,10 @@ class ARMV5TPat<dag pattern, dag result> : Pat<pattern, result> { class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV5TE]; } +// ARMV5MOPat - Same as ARMV5TEPat with UseMulOps. +class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> { + list<Predicate> Predicates = [IsARM, HasV5TE, UseMulOps]; +} class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsARM, HasV6]; } diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 31b0c41f08..e62187727c 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -13,13 +13,17 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; @@ -84,3 +88,61 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } + +namespace { + /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for ARM ELF. + struct ARMCGBR : public MachineFunctionPass { + static char ID; + ARMCGBR() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + if (AFI->getGlobalBaseReg() == 0) + return false; + + const ARMTargetMachine *TM = + static_cast<const ARMTargetMachine *>(&MF.getTarget()); + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + LLVMContext* Context = &MF.getFunction()->getContext(); + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, + "_GLOBAL_OFFSET_TABLE_"); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); + unsigned Align = TM->getTargetData()->getPrefTypeAlignment(GV->getType()); + unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); + + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? + ARM::t2LDRpci : ARM::LDRcp; + const TargetInstrInfo &TII = *TM->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, + TII.get(Opc), GlobalBaseReg) + .addConstantPoolIndex(Idx); + if (Opc == ARM::LDRcp) + MIB.addImm(0); + AddDefaultPred(MIB); + + return true; + } + + virtual const char *getPassName() const { + return "ARM PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char ARMCGBR::ID = 0; +FunctionPass* +llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 2060bb9374..118c9ea5dd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -215,6 +215,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate<"FeatureHWDiv", "divide">; +def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, + AssemblerPredicate<"FeatureHWDivARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -250,6 +252,7 @@ def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. @@ -260,6 +263,20 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +// VGETLNi32 is microcoded on Swift - prefer VMOV. +def HasFastVGETLNi32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVGETLNi32 : Predicate<"Subtarget->isSwift()">; + +// VDUP.32 is microcoded on Swift - prefer VMOV. +def HasFastVDUP32 : Predicate<"!Subtarget->isSwift()">; +def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; + +// Cortex-A9 prefers VMOVSR to VMOVDRR even when using NEON for scalar FP, as +// this allows more effective execution domain optimization. See +// setExecutionDomain(). +def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; + def IsLE : Predicate<"TLI.isLittleEndian()">; def IsBE : Predicate<"TLI.isBigEndian()">; @@ -3593,13 +3610,13 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6, UseMulOps]>; } def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6, UseMulOps]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3615,7 +3632,7 @@ def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2, UseMulOps]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3721,7 +3738,7 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3731,7 +3748,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6, UseMulOps]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), @@ -3785,7 +3802,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3793,7 +3810,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3801,7 +3818,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3809,7 +3826,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (opnode (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3817,7 +3834,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -3825,7 +3842,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { [(set GPRnopc:$Rd, (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -3928,6 +3945,19 @@ defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// +// Division Instructions (ARMv7-A with virtualization extension) +// +def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, + "udiv", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasDivideInARM]>; + +//===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // @@ -4989,32 +5019,32 @@ def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), (sra (shl GPR:$b, (i32 16)), (i32 16)))), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, +def : ARMV5MOPat<(add GPR:$acc, (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1bcb48776e..de655f1a0e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5043,7 +5043,8 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), - imm:$lane))]> { + imm:$lane))]>, + Requires<[HasNEON, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5066,7 +5067,16 @@ def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane))>; + (SubReg_i32_lane imm:$lane))>, + Requires<[HasNEON, HasFastVGETLNi32]>; +def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; +def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, + Requires<[HasNEON, HasSlowVGETLNi32]>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; @@ -5175,14 +5185,23 @@ class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, + Requires<[HasNEON, HasFastVDUP32]>; def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +// NEONvdup patterns for uarchs with fast VDUP.32. +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, + Requires<[HasNEON,HasFastVDUP32]>; def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; +// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. +def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, + Requires<[HasNEON,HasSlowVDUP32]>; + // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, @@ -5619,6 +5638,11 @@ def : N2VSPat<arm_ftoui, VCVTf2ud>; def : N2VSPat<arm_sitof, VCVTs2fd>; def : N2VSPat<arm_uitof, VCVTu2fd>; +// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. +def : Pat<(f32 (bitconvert GPR:$a)), + (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, + Requires<[HasNEON, DontUseVMOVSR]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2bb667ef37..37b280f447 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2396,7 +2396,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, def t2MLA: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> { + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2406,7 +2407,8 @@ def t2MLA: T2FourReg< def t2MLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> { + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>, + Requires<[IsThumb2, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -2475,7 +2477,7 @@ def t2SMMLA : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -2496,7 +2498,7 @@ def t2SMMLS: T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -2601,7 +2603,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2614,7 +2616,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2627,7 +2629,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sext_inreg rGPR:$Rm, i16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2640,7 +2642,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), (sra rGPR:$Rm, (i32 16)))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -2653,7 +2655,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2666,7 +2668,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, - Requires<[IsThumb2, HasThumb2DSP]> { + Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -2760,7 +2762,7 @@ def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), // Division Instructions. // Signed and unsigned division on v7-M // -def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { @@ -2771,7 +2773,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, Requires<[HasDivide, IsThumb2]> { diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 7d6692f307..b5a896c699 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -523,10 +523,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let D = VFPNeonDomain; } +// Bitcast i32 -> f32. NEON prefers to use VMOVDRR. def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", - [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, + Requires<[HasVFP2, UseVMOVSR]> { // Instruction operands. bits<5> Sn; bits<4> Rt; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index f1c8fc8481..c0ac04b600 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -108,6 +108,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// pass. DenseMap<unsigned, unsigned> CPEClones; + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + public: ARMFunctionInfo() : isThumb(false), @@ -119,7 +124,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -130,7 +135,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false) {} + VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -249,6 +254,9 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) assert(0 && "Duplicate entries!"); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6f974fd17d..ed8ac1aff7 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -247,11 +247,16 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { } // Scalar single precision floating point register class.. -def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; +// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to +// avoid partial-write dependencies on D registers (S registers are +// renamed as portions of D registers). +def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate + (sequence "S%u", 0, 31), 2), + (sequence "S%u", 0, 31))>; // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; // Scalar double precision floating point / generic 64-bit vector register // class. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 81d2fa37c2..02196d06bf 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -55,6 +55,7 @@ def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; +def IIC_iDIV : InstrItinClass; def IIC_iLoad_i : InstrItinClass; def IIC_iLoad_r : InstrItinClass; def IIC_iLoad_si : InstrItinClass; @@ -261,3 +262,4 @@ def IIC_VTBX4 : InstrItinClass; include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" +include "ARMScheduleSwift.td" diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td new file mode 100644 index 0000000000..e9bc3e0f39 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -0,0 +1,1085 @@ +//=- ARMScheduleSwift.td - Swift Scheduling Definitions -*- tablegen -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Swift processor.. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// This section contains legacy support for itineraries. This is +// required until SD and PostRA schedulers are replaced by MachineScheduler. + +def SW_DIS0 : FuncUnit; +def SW_DIS1 : FuncUnit; +def SW_DIS2 : FuncUnit; + +def SW_ALU0 : FuncUnit; +def SW_ALU1 : FuncUnit; +def SW_LS : FuncUnit; +def SW_IDIV : FuncUnit; +def SW_FDIV : FuncUnit; + +// FIXME: Need bypasses. +// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and +// IIC_iMOVix2ld better. +// FIXME: Model the special immediate shifts that are not microcoded. +// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it +// to issue on pipe 1? +// FIXME: Model the pipelined behavior of CMP / TST instructions. +// FIXME: Better model the microcode stages of multiply instructions, especially +// conditional variants. +// FIXME: Add preload instruction when it is documented. +// FIXME: Model non-pipelined nature of FP div / sqrt unit. + +def SwiftItineraries : ProcessorItineraries< + [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_LS]>], + [5]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Unary Instructions that produce a result + + // CLZ, RBIT, etc. + InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Move instructions, conditional + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 3>, + InstrStage<1, [SW_ALU0]>], + [5, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 6, 1, 1]>, + // + // Integer divide + InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 0>, + InstrStage<14, [SW_IDIV]>], + [14, 1, 1]>, + + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [4, 1]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Register offset + InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [], [], -1>, // dynamic uops + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [2], [], -1>, // dynamic uops + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>, + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // Single-precision FP Unary + // + // Most floating-point moves get issued on ALU0. + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 4>, + InstrStage<1, [SW_ALU1]>], + [6, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // FP Load Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 4], [], -1>, // dynamic uops + // + // FP Load Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 4], [], -1>, // dynamic uops + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Double-precision FP Store + InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // FP Store Multiple + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [1, 1, 1], [], -1>, // dynamic uops + // + // FP Store Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1], [], -1>, // dynamic uops + // NEON + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Count + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Quad-register Permute Move + InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [4, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Integer to Lane Move + // FIXME: I think this is correct, but it is not clear from the tuning guide. + InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + + // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Double-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Quad-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions. + InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register FP Binary + InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FP Multiple-Accumulate + InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FusedF P Multiple-Accumulate + InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute (3 cycle issue on A9) + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]> +]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Swift machine model for scheduling and other instruction cost heuristics. +def SwiftModel : SchedMachineModel { + let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. + let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let LoadLatency = 3; + + let Itineraries = SwiftItineraries; +} + +// TODO: Add Swift processor and scheduler resources. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c8aa0779bc..6562600202 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -41,6 +41,10 @@ NoInlineJumpTables("no-inline-jumptables", // @LOCALMOD-END static cl::opt<bool> +UseFusedMulOps("arm-use-mulops", + cl::init(true), cl::Hidden); + +static cl::opt<bool> StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); @@ -59,6 +63,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv4(false) , HasNEON(false) , UseNEONForSinglePrecisionFP(false) + , UseMulOps(UseFusedMulOps) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) @@ -74,6 +79,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasFP16(false) , HasD16(false) , HasHardwareDivide(false) + , HasHardwareDivideInARM(false) , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 0a5744e5c1..64081f5be2 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -38,7 +38,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9, CortexA15 + Others, CortexA8, CortexA9, CortexA15, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -65,6 +65,10 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// UseMulOps - True if non-microcoded fused integer multiply-add and + /// multiply-subtract instructions should be used. + bool UseMulOps; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; @@ -115,6 +119,9 @@ protected: /// HasHardwareDivide - True if subtarget supports [su]div bool HasHardwareDivide; + /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode + bool HasHardwareDivideInARM; + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack /// instructions. bool HasT2ExtractPack; @@ -214,6 +221,7 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } + bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } bool isLikeA9() const { return isCortexA9() || isCortexA15(); } @@ -227,8 +235,10 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } + bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4675c98f0d..ac5f14c09c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -150,6 +150,11 @@ bool ARMPassConfig::addPreISel() { bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); + + const ARMSubtarget *Subtarget = &getARMSubtarget(); + if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() && + TM->Options.EnableFastISel) + addPass(createARMGlobalBaseRegPass()); return false; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bc711dc35f..aa5ba46ab2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -257,21 +257,11 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - unsigned checkTargetMatchPredicate(MCInst &Inst); bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); - - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, NumMCOperands); - } }; } // end anonymous namespace @@ -5676,6 +5666,20 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Alias for alternate form of 'ADR Rd, #imm' instruction. + case ARM::ADDri: { + if (Inst.getOperand(1).getReg() != ARM::PC || + Inst.getOperand(5).getReg() != 0) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADR); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: Inst.setOpcode(ARM::t2LDRpci); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 34c79f945f..dfc424cda2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -714,6 +714,15 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef else if (TheTriple.getArchName() == "armv6" || TheTriple.getArchName() == "thumbv6") return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); + else if (TheTriple.getArchName() == "armv7f" || + TheTriple.getArchName() == "thumbv7f") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); + else if (TheTriple.getArchName() == "armv7k" || + TheTriple.getArchName() == "thumbv7k") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); + else if (TheTriple.getArchName() == "armv7s" || + TheTriple.getArchName() == "thumbv7s") + return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e581cc82fa..406317cee4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -71,6 +71,14 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { else // Use CPU to figure out the exact features. ARMArchFeature = "+v7"; + } else if (Len >= Idx+2 && TT[Idx+1] == 's') { + if (NoCPU) + // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + // Swift + ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; } else { // v7 CPUs have lots of different feature sets. If no CPU is specified, // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 95640f7df9..2154c93176 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -41,6 +41,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); + bool requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, const MCSymbolData *SD, + uint64_t FixedValue); + public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype) @@ -305,6 +311,46 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } +bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCFragment &Fragment, + unsigned RelocType, + const MCSymbolData *SD, + uint64_t FixedValue) { + // Most cases can be identified purely from the symbol. + if (Writer->doesSymbolRequireExternRelocation(SD)) + return true; + int64_t Value = (int64_t)FixedValue; // The displacement is signed. + int64_t Range; + switch (RelocType) { + default: + return false; + case macho::RIT_ARM_Branch24Bit: + // PC pre-adjustment of 8 for these instructions. + Value -= 8; + // ARM BL/BLX has a 25-bit offset. + Range = 0x1ffffff; + break; + case macho::RIT_ARM_ThumbBranch22Bit: + // PC pre-adjustment of 4 for these instructions. + Value -= 4; + // Thumb BL/BLX has a 24-bit offset. + Range = 0xffffff; + } + // BL/BLX also use external relocations when an internal relocation + // would result in the target being out of range. This gives the linker + // enough information to generate a branch island. + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Value += Writer->getSectionAddress(&SymSD); + Value -= Writer->getSectionAddress(Fragment.getParent()); + // If the resultant value would be out of range for an internal relocation, + // use an external instead. + if (Value > Range || Value < -(Range + 1)) + return true; + return false; +} + void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -373,7 +419,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Check whether we need an external or internal relocation. - if (Writer->doesSymbolRequireExternRelocation(SD)) { + if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, + FixedValue)) { IsExtern = 1; Index = SD->getIndex(); diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 4ebba0e4d3..70643bcda3 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -52,6 +52,7 @@ namespace { MachineRegisterInfo *MRI; bool isLikeA9; + bool isSwift; unsigned MIIdx; MachineInstr* LastMIs[4]; SmallPtrSet<MachineInstr*, 4> IgnoreStall; @@ -60,6 +61,7 @@ namespace { void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; + bool hasLoopHazard(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, @@ -135,6 +137,50 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { return Reg; } +/// hasLoopHazard - Check whether an MLx instruction is chained to itself across +/// a single-MBB loop. +bool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + while (true) { +outer_continue: + if (DefMI->getParent() != MBB) + break; + + if (DefMI->isPHI()) { + for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { + if (DefMI->getOperand(i + 1).getMBB() == MBB) { + unsigned SrcReg = DefMI->getOperand(i).getReg(); + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DefMI = MRI->getVRegDef(SrcReg); + goto outer_continue; + } + } + } + } else if (DefMI->isCopyLike()) { + Reg = DefMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } else if (DefMI->isInsertSubreg()) { + Reg = DefMI->getOperand(2).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } + + break; + } + + return DefMI == MI; +} + bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); @@ -149,6 +195,19 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { return false; } +static bool isFpMulInstruction(unsigned Opcode) { + switch (Opcode) { + case ARM::VMULS: + case ARM::VMULfd: + case ARM::VMULfq: + case ARM::VMULD: + case ARM::VMULslfd: + case ARM::VMULslfq: + return true; + default: + return false; + } +} bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) @@ -171,6 +230,12 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { return true; } + // On Swift, we mostly care about hazards from multiplication instructions + // writing the accumulator and the pipelining of loop iterations by out-of- + // order execution. + if (isSwift) + return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); + if (IgnoreStall.count(MI)) return false; @@ -316,7 +381,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); - isLikeA9 = STI->isLikeA9(); + isLikeA9 = STI->isLikeA9() || STI->isSwift(); + isSwift = STI->isSwift(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 703a128ee0..1c891f14d8 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1350,6 +1350,8 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine } else { setOperationAction(ISD::BR_JT, MVT::Other, Expand); } + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(5); setOperationAction(ISD::BR_CC, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index daceb88076..9e22fd06d1 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -44,10 +44,6 @@ class MBlazeAsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -60,13 +56,6 @@ class MBlazeAsmParser : public MCTargetAsmParser { /// } - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, - NumMCOperands); - } - public: MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) : MCTargetAsmParser(), Parser(_Parser) {} diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 4cbd4c8e12..b1ada100f4 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -41,10 +41,6 @@ class MipsAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -62,11 +58,6 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); - unsigned - getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -265,18 +256,6 @@ public: }; } -unsigned MipsAsmParser:: -getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - assert (0 && "getMCInstOperandNum() not supported by the Mips target."); - // The Mips backend doesn't currently include the matcher implementation, so - // the getMCInstOperandNumImpl() is undefined. This is a temporary - // work around. - NumMCOperands = 0; - return 0; -} - bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 0f84358e26..7dec066fb6 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index aa5747209b..82dbcc5bcf 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -108,6 +108,11 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -138,6 +143,11 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -346,6 +356,13 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeCPURegsRegisterClass(Inst, RegNo, Address, Decoder); +} + static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -463,6 +480,18 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 4) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 96033276d2..233214b461 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -122,14 +122,16 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: case Mips::FCC0: + case Mips::D0: case Mips::FCC0: case Mips::AC0: return 0; case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: + case Mips::AC1: return 1; case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: - case Mips::D1: + case Mips::D1: case Mips::AC2: return 2; case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: + case Mips::AC3: return 3; case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: case Mips::D2: diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 93de517316..bd8c517345 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \ - MipsGenAsmMatcher.inc + MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 9248032340..127c5b89e8 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -84,7 +84,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + unsigned Opc = 0; + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::SwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } void Mips16InstrInfo:: @@ -92,7 +100,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "Implement this function."); + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) + Opc = Mips::LwRxSpImmX16; + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); } bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index b0ab464a68..b866a5d225 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -29,10 +29,35 @@ class FI8_MOVR3216_ins<string asmstr, InstrItinClass itin>: // // I8_MOV32R instruction format (used only by MOV32R instruction) // + class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>: FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz), !strconcat(asmstr, "\t$r32, $rz"), [], itin>; + +// +// RR-type instruction format +// + +class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> { +} + +class FRxRxRy16_ins<bits<5> f, string asmstr, + InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $ry"), + [], itin> { + let Constraints = "$rx = $rz"; +} + +let rx=0 in +class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, + string asmstr, InstrItinClass itin>: + FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), + [], itin> ; + // // EXT-RI instruction format // @@ -56,30 +81,14 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr, !strconcat(asmstr, "\t$rx, $imm"), [], itin> { let Constraints = "$rx_ = $rx"; } - - -// -// RR-type instruction format -// - -class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), - !strconcat(asmstr, "\t$rx, $ry"), [], itin> { -} - -class FRxRxRy16_ins<bits<5> f, string asmstr, - InstrItinClass itin> : - FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), - !strconcat(asmstr, "\t$rz, $ry"), - [], itin> { - let Constraints = "$rx = $rz"; +// this has an explicit sp argument that we ignore to work around a problem +// in the compiler +class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPUSPReg:$ry, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm ( $ry ); "), [], itin> { } -let rx=0 in -class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, - string asmstr, InstrItinClass itin>: - FRR16_JALRC<nd_, l_, 1, (outs), (ins), !strconcat(asmstr, "\t $$ra"), - [], itin> ; // // EXT-RRI instruction format @@ -122,6 +131,13 @@ class ArithLogic16Defs<bit isCom=0> { bit neverHasSideEffects = 1; } +class MayLoad { + bit mayLoad = 1; +} + +class MayStore { + bit mayStore = 1; +} // // Format: ADDIU rx, immediate MIPS16e @@ -169,28 +185,30 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; // Purpose: Load Byte (Extended) // To load a byte from memory as a signed value. // -def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IILoad>, MayLoad; // // Format: LBU ry, offset(rx) MIPS16e // Purpose: Load Byte Unsigned (Extended) // To load a byte from memory as a unsigned value. // -def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; +def LbuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IILoad>, MayLoad; // // Format: LH ry, offset(rx) MIPS16e // Purpose: Load Halfword signed (Extended) // To load a halfword from memory as a signed value. // -def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IILoad>, MayLoad; // // Format: LHU ry, offset(rx) MIPS16e // Purpose: Load Halfword unsigned (Extended) // To load a halfword from memory as an unsigned value. // -def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; +def LhuRxRyOffMemX16: + FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IILoad>, MayLoad; // // Format: LI rx, immediate MIPS16e @@ -204,7 +222,13 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; + +// Format: LW rx, offset(sp) MIPS16e +// Purpose: Load Word (SP-Relative, Extended) +// To load an SP-relative word from memory as a signed value. +// +def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad; // // Format: MOVE r32, rz MIPS16e @@ -257,7 +281,7 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; let ra=1, s=0,s0=1,s1=1 in def RestoreRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > { + "restore \t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { let isCodeGenOnly = 1; } @@ -271,7 +295,7 @@ def RestoreRaF16: let ra=1, s=1,s0=1,s1=1 in def SaveRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save \t$$ra, $$s0, $$s1, $frame_size", [], IILoad > { + "save \t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { let isCodeGenOnly = 1; } // @@ -279,14 +303,16 @@ def SaveRaF16: // Purpose: Store Byte (Extended) // To store a byte to memory. // -def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; +def SbRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore; // // Format: SH ry, offset(rx) MIPS16e // Purpose: Store Halfword (Extended) // To store a halfword to memory. // -def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; +def ShRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIStore>, MayStore; // // Format: SLL rx, ry, sa MIPS16e @@ -350,9 +376,18 @@ def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; // Purpose: Store Word (Extended) // To store a word to memory. // -def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; +def SwRxRyOffMemX16: + FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIStore>, MayStore; // +// Format: SW rx, offset(sp) MIPS16e +// Purpose: Store Word rx (SP-Relative) +// To store an SP-relative word to memory. +// +def SwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b11010, "sw", IIStore>, MayStore; + +// +// // Format: XOR rx, ry MIPS16e // Purpose: Xor // To do a bitwise logical XOR. diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 106e82fd38..bfc6b6cabf 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -57,7 +57,6 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); int MinCSFI = 0; @@ -77,8 +76,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // getFrameRegister() returns. unsigned FrameReg; - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -94,12 +92,8 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 729b7921b4..1bf4a542d8 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -50,6 +50,13 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return true; } +bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { + MCOp = MCInstLowering.LowerOperand(MO); + return MCOp.isValid(); +} + +#include "MipsGenMCPseudoLowering.inc" + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MI->isDebugValue()) { SmallString<128> Str; @@ -59,6 +66,10 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(OutStreamer, MI)) + return; + MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index a426f55ba7..efed6357a4 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -32,6 +32,14 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { void EmitInstrWithMacroNoAT(const MachineInstr *MI); +private: + // tblgen'erated function. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + public: const MipsSubtarget *Subtarget; diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index d9bcccc617..8e01d06596 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -23,3 +23,287 @@ def REGIMM_OPCODE : Field6<0b000001>; class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { let Predicates = [HasDSP]; } + +class PseudoDSP<dag outs, dag ins, list<dag> pattern>: + MipsPseudo<outs, ins, "", pattern> { + let Predicates = [HasDSP]; +} + +// ADDU.QB sub-class format. +class ADDU_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +class RADDU_W_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010000; +} + +// CMPU.EQ.QB sub-class format. +class CMP_EQ_QB_R2_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = 0; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class CMP_EQ_QB_R3_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> rd; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +class PRECR_SRA_PH_W_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<5> rt; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b010001; +} + +// ABSQ_S.PH sub-class format. +class ABSQ_S_PH_R2_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + + +class REPL_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> imm; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = imm; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010010; +} + +// SHLL.QB sub-class format. +class SHLL_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rt; + bits<5> rs_sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs_sa; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b010011; +} + +// LX sub-class format. +class LX_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> base; + bits<5> index; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b001010; +} + +// ADDUH.QB sub-class format. +class ADDUH_QB_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b011000; +} + +// APPEND sub-class format. +class APPEND_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<5> rs; + bits<5> sa; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = sa; + let Inst{10-6} = op; + let Inst{5-0} = 0b110001; +} + +// DPA.W.PH sub-class format. +class DPA_W_PH_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b110000; +} + +// MULT sub-class format. +class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst { + bits<2> ac; + bits<5> rs; + bits<5> rt; + + let Opcode = opcode; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +// EXTR.W sub-class format (type 1). +class EXTR_W_TY1_FMT<bits<5> op> : DSPInst { + bits<5> rt; + bits<2> ac; + bits<5> shift_rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = shift_rs; + let Inst{20-16} = rt; + let Inst{15-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +// SHILO sub-class format. +class SHILO_R1_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<6> shift; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-20} = shift; + let Inst{19-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class SHILO_R2_FMT<bits<5> op> : DSPInst { + bits<2> ac; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-13} = 0; + let Inst{12-11} = ac; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class RDDSP_FMT<bits<5> op> : DSPInst { + bits<5> rd; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-16} = mask; + let Inst{15-11} = rd; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class WRDSP_FMT<bits<5> op> : DSPInst { + bits<5> rs; + bits<10> mask; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-11} = mask; + let Inst{10-6} = op; + let Inst{5-0} = 0b111000; +} + +class BPOSGE32_FMT<bits<5> op> : DSPInst { + bits<16> offset; + + let Opcode = REGIMM_OPCODE.V; + + let Inst{25-21} = 0; + let Inst{20-16} = op; + let Inst{15-0} = offset; +} + +// INSV sub-class format. +class INSV_FMT<bits<6> op> : DSPInst { + bits<5> rt; + bits<5> rs; + + let Opcode = SPECIAL3_OPCODE.V; + + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-6} = 0; + let Inst{5-0} = op; +} diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 1a4fd8733a..ef9402865b 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -18,3 +18,1302 @@ def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>; def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>; def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; + +// Mips-specific dsp nodes +def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; +def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; + +class MipsDSPBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + +class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> : + SDNode<!strconcat("MipsISD::", Opc), Prof, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>; + +def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; +def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; +def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>; +def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>; +def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; +def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; + +def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; +def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>; + +def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; +def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; +def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>; +def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>; + +def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>; +def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>; +def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>; +def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>; +def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>; +def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>; +def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>; +def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>; + +def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>; +def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>; +def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>; +def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>; +def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>; +def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>; +def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>; +def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>; +def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>; + +def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>; +def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>; +def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; +def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; +def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; +def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; + +// Flags. +class IsCommutable { + bit isCommutable = 1; +} + +class UseAC { + list<Register> Uses = [AC0]; +} + +class UseDSPCtrl { + list<Register> Uses = [DSPCtrl]; +} + +class ClearDefs { + list<Register> Defs = []; +} + +// Instruction encoding. +class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>; +class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>; +class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>; +class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>; +class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>; +class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>; +class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>; +class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>; +class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>; +class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>; +class ADDSC_ENC : ADDU_QB_FMT<0b10000>; +class ADDWC_ENC : ADDU_QB_FMT<0b10001>; +class MODSUB_ENC : ADDU_QB_FMT<0b10010>; +class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>; +class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>; +class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>; +class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>; +class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>; +class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>; +class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>; +class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>; +class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>; +class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>; +class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>; +class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>; +class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>; +class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>; +class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>; +class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>; +class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>; +class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>; +class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>; +class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>; +class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>; +class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>; +class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>; +class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>; +class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>; +class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>; +class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>; +class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>; +class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>; +class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>; +class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>; +class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>; +class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>; +class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>; +class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>; +class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>; +class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>; +class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>; +class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>; +class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>; +class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>; +class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>; +class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>; +class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>; +class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>; +class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>; +class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>; +class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>; +class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>; +class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>; +class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>; +class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>; +class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>; +class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>; +class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>; +class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>; +class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>; +class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>; +class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>; +class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>; +class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>; +class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>; +class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>; +class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>; +class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>; +class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>; +class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>; +class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>; +class REPL_QB_ENC : REPL_FMT<0b00010>; +class REPL_PH_ENC : REPL_FMT<0b01010>; +class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>; +class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>; +class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>; +class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>; +class LWX_ENC : LX_FMT<0b00000>; +class LHX_ENC : LX_FMT<0b00100>; +class LBUX_ENC : LX_FMT<0b00110>; +class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>; +class INSV_ENC : INSV_FMT<0b001100>; + +class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>; +class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>; +class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>; +class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>; +class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>; +class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>; +class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>; +class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>; +class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>; +class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>; +class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>; +class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>; +class SHILO_ENC : SHILO_R1_FMT<0b11010>; +class SHILOV_ENC : SHILO_R2_FMT<0b11011>; +class MTHLIP_ENC : SHILO_R2_FMT<0b11111>; + +class RDDSP_ENC : RDDSP_FMT<0b10010>; +class WRDSP_ENC : WRDSP_FMT<0b10011>; +class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>; +class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>; +class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>; +class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>; +class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>; +class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>; +class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>; +class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>; +class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>; +class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>; +class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>; +class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>; +class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>; +class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>; +class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>; +class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>; +class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>; +class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>; +class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>; +class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>; +class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>; +class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>; +class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>; +class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>; +class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>; +class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>; +class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>; +class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>; +class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>; +class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>; +class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>; +class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>; +class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>; +class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>; +class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>; +class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>; +class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>; +class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>; +class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>; +class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>; +class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>; +class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>; +class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>; +class APPEND_ENC : APPEND_FMT<0b00000>; +class BALIGN_ENC : APPEND_FMT<0b10000>; +class PREPEND_ENC : APPEND_FMT<0b00001>; + +// Instruction desc. +class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCS, + RegisterClass RCT = RCS> { + dag OutOperandList = (outs); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); + list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS, RegisterClass RCT = RCS> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCT, + RegisterClass RCS = RCT> { + dag OutOperandList = (outs RCT:$rt); + dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins uimm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); + list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, CPURegs:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmPat, InstrItinClass itin, + RegisterClass RC> { + dag OutOperandList = (outs RC:$rd); + dag InOperandList = (ins RC:$rt, uimm16:$rs_sa); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); + list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins CPURegs:$base, CPURegs:$index); + string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); + list<dag> Pattern = [(set CPURegs:$rd, + (OpNode CPURegs:$base, CPURegs:$index))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + bit mayLoad = 1; +} + +class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin, RegisterClass RCD, + RegisterClass RCS = RCD, RegisterClass RCT = RCD> { + dag OutOperandList = (outs RCD:$rd); + dag InOperandList = (ins RCS:$rs, RCT:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + SDPatternOperator ImmOp, InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$rs, shamt:$sa, CPURegs:$src); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); + list<dag> Pattern = [(set CPURegs:$rt, + (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>, + PseudoInstExpansion<(realinst AC0, simm16:$shift)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R1_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins simm16:$shift); + string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); +} + +class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class SHILO_R2_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); +} + +class MTHLIP_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); +} + +class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rd); + dag InOperandList = (ins uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); + list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; +} + +class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins CPURegs:$rs, uimm16:$mask); + string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); + list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)]; + InstrItinClass Itinerary = itin; + list<Register> Defs = [DSPCtrl]; +} + +class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + list<Register> Uses = [AC0]; + InstrItinClass Itinerary = itin; +} + +class DPA_W_PH_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, + Instruction realinst> : + PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), + [(OpNode CPURegs:$rs, CPURegs:$rt)]>, + PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { + list<Register> Defs = [DSPCtrl, AC0]; + InstrItinClass Itinerary = itin; +} + +class MULT_DESC_BASE<string instr_asm> { + dag OutOperandList = (outs ACRegs:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); +} + +class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : + MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> { + list<Register> Uses = [DSPCtrl]; + bit usesCustomInserter = 1; +} + +class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> { + dag OutOperandList = (outs); + dag InOperandList = (ins brtarget:$offset); + string AsmString = !strconcat(instr_asm, "\t$offset"); + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + bit isBranch = 1; + bit isTerminator = 1; + bit hasDelaySlot = 1; +} + +class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs CPURegs:$rt); + dag InOperandList = (ins CPURegs:$src, CPURegs:$rs); + string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); + list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))]; + InstrItinClass Itinerary = itin; + list<Register> Uses = [DSPCtrl]; + string Constraints = "$src = $rt"; +} + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 1 +//===----------------------------------------------------------------------===// + +// Addition/subtraction +class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, + NoItinerary, CPURegs, CPURegs>, + IsCommutable; + +class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, + NoItinerary, CPURegs, CPURegs>; + +class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary, + CPURegs, CPURegs>, IsCommutable; + +class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary, + CPURegs, CPURegs>, + IsCommutable, UseDSPCtrl; + +class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, + CPURegs, CPURegs>, ClearDefs; + +class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +// Absolute value +class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, + NoItinerary, DSPRegs>; + +class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, + NoItinerary, CPURegs>; + +// Precision reduce/expand +class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", + int_mips_precrq_qb_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", + int_mips_precrq_ph_w, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", + int_mips_precrq_rs_ph_w, + NoItinerary, DSPRegs, + CPURegs>; + +class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", + int_mips_precrqu_s_qb_ph, + NoItinerary, DSPRegs, + DSPRegs>; + +class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", + int_mips_preceq_w_phl, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", + int_mips_preceq_w_phr, + NoItinerary, CPURegs, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", + int_mips_precequ_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", + int_mips_precequ_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", + int_mips_precequ_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", + int_mips_precequ_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", + int_mips_preceu_ph_qbl, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", + int_mips_preceu_ph_qbr, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", + int_mips_preceu_ph_qbla, + NoItinerary, DSPRegs>, + ClearDefs; + +class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", + int_mips_preceu_ph_qbra, + NoItinerary, DSPRegs>, + ClearDefs; + +// Shift +class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3, + NoItinerary, DSPRegs>; + +class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, + NoItinerary, DSPRegs>; + +class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4, + NoItinerary, DSPRegs>; + +class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, + NoItinerary, DSPRegs>; + +class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, + immZExt4, NoItinerary, DSPRegs>; + +class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, + NoItinerary, DSPRegs>; + +class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, + immZExt4, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, + immZExt5, NoItinerary, CPURegs>; + +class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, + NoItinerary, CPURegs>; + +class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, + immZExt5, NoItinerary, CPURegs>, + ClearDefs; + +class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, + NoItinerary, CPURegs>; + +// Multiplication +class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", + int_mips_muleu_s_ph_qbl, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", + int_mips_muleu_s_ph_qbr, + NoItinerary, DSPRegs, DSPRegs>; + +class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", + int_mips_muleq_s_w_phl, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", + int_mips_muleq_s_w_phr, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">; + +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">; + +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">; + +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">; + +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">; + +// Dot product with accumulate/subtract +class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">; + +class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">; + +class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">; + +class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">; + +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">; + +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">; + +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">; + +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">; + +class MULT_DSP_DESC : MULT_DESC_BASE<"mult">; + +class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">; + +class MADD_DSP_DESC : MULT_DESC_BASE<"madd">; + +class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">; + +class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">; + +class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">; + +// Comparison +class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", + int_mips_cmpu_eq_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", + int_mips_cmpu_lt_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", + int_mips_cmpu_le_qb, NoItinerary, + DSPRegs>, IsCommutable; + +class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", + int_mips_cmpgu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", + int_mips_cmpgu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", + int_mips_cmpgu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, + NoItinerary, DSPRegs>, + IsCommutable; + +// Misc +class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, + NoItinerary, CPURegs>, ClearDefs; + +class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs; + +class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8, + NoItinerary, DSPRegs>, ClearDefs; + +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10, + NoItinerary, DSPRegs>, ClearDefs; + +class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, + NoItinerary, DSPRegs, CPURegs>, + ClearDefs; + +class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, + NoItinerary, DSPRegs, DSPRegs>, + ClearDefs, UseDSPCtrl; + +class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs; + +class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs; + +class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs; + +class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>; + +// Extr +class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>; + +class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>; + +class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>; + +class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP, + NoItinerary>; + +class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>; + +class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W, + NoItinerary>; + +class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, + NoItinerary>; + +class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, + NoItinerary>; + +class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, + NoItinerary>; + +class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, + NoItinerary>; + +class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">; + +class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">; + +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">; + +class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; + +class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>; + +class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>; + +//===----------------------------------------------------------------------===// +// MIPS DSP Rev 2 +// Addition/subtraction +class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, + DSPRegs, DSPRegs>, IsCommutable; + +class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, + DSPRegs, DSPRegs>; + +class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, + NoItinerary, DSPRegs>, + ClearDefs, IsCommutable; + +class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, + NoItinerary, DSPRegs>, ClearDefs; + +class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, + NoItinerary, CPURegs>, + ClearDefs, IsCommutable; + +class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, + NoItinerary, CPURegs>, ClearDefs; + +class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, + NoItinerary, CPURegs>, ClearDefs; + +// Comparison +class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", + int_mips_cmpgdu_eq_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", + int_mips_cmpgdu_lt_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", + int_mips_cmpgdu_le_qb, + NoItinerary, CPURegs, DSPRegs>, + IsCommutable; + +// Absolute +class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, + NoItinerary, DSPRegs>; + +// Multiplication +class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary, + DSPRegs>, IsCommutable; + +class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, + NoItinerary, DSPRegs>, IsCommutable; + +class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, + NoItinerary, CPURegs>, IsCommutable; + +class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, + NoItinerary, DSPRegs, DSPRegs>, + IsCommutable; + +// Dot product with accumulate/subtract +class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">; + +class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">; + +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">; + +class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">; + +class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">; + +class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">; + +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">; + +class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">; + +class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">; + +// Precision reduce/expand +class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", + int_mips_precr_qb_ph, + NoItinerary, DSPRegs, DSPRegs>; + +class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", + int_mips_precr_sra_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", + int_mips_precr_sra_r_ph_w, + NoItinerary, DSPRegs, + CPURegs>, ClearDefs; + +// Shift +class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, + immZExt3, NoItinerary, DSPRegs>, + ClearDefs; + +class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4, + NoItinerary, DSPRegs>, ClearDefs; + +class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, + NoItinerary, DSPRegs>, ClearDefs; + +// Misc +class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5, + NoItinerary>, ClearDefs; + +class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2, + NoItinerary>, ClearDefs; + +class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5, + NoItinerary>, ClearDefs; + +// Pseudos. +def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>; + +// Instruction defs. +// MIPS DSP Rev 1 +def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC; +def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC; +def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC; +def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC; +def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC; +def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC; +def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC; +def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC; +def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC; +def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC; +def ADDSC : ADDSC_ENC, ADDSC_DESC; +def ADDWC : ADDWC_ENC, ADDWC_DESC; +def MODSUB : MODSUB_ENC, MODSUB_DESC; +def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC; +def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC; +def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC; +def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC; +def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC; +def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC; +def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC; +def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC; +def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC; +def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC; +def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC; +def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC; +def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC; +def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC; +def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC; +def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC; +def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC; +def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC; +def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC; +def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC; +def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC; +def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC; +def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC; +def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC; +def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC; +def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC; +def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC; +def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC; +def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC; +def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC; +def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC; +def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC; +def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC; +def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC; +def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC; +def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC; +def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC; +def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC; +def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC; +def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC; +def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC; +def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC; +def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC; +def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC; +def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC; +def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC; +def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC; +def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC; +def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC; +def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC; +def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC; +def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC; +def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC; +def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC; +def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC; +def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC; +def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC; +def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC; +def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC; +def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC; +def CMPGU_EQ_QB : CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC; +def CMPGU_LT_QB : CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC; +def CMPGU_LE_QB : CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC; +def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC; +def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC; +def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC; +def BITREV : BITREV_ENC, BITREV_DESC; +def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC; +def REPL_QB : REPL_QB_ENC, REPL_QB_DESC; +def REPL_PH : REPL_PH_ENC, REPL_PH_DESC; +def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC; +def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC; +def PICK_QB : PICK_QB_ENC, PICK_QB_DESC; +def PICK_PH : PICK_PH_ENC, PICK_PH_DESC; +def LWX : LWX_ENC, LWX_DESC; +def LHX : LHX_ENC, LHX_DESC; +def LBUX : LBUX_ENC, LBUX_DESC; +def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC; +def INSV : INSV_ENC, INSV_DESC; +def EXTP : EXTP_ENC, EXTP_DESC; +def EXTPV : EXTPV_ENC, EXTPV_DESC; +def EXTPDP : EXTPDP_ENC, EXTPDP_DESC; +def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC; +def EXTR_W : EXTR_W_ENC, EXTR_W_DESC; +def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC; +def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC; +def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC; +def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC; +def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC; +def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC; +def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC; +def SHILO : SHILO_ENC, SHILO_DESC; +def SHILOV : SHILOV_ENC, SHILOV_DESC; +def MTHLIP : MTHLIP_ENC, MTHLIP_DESC; +def RDDSP : RDDSP_ENC, RDDSP_DESC; +def WRDSP : WRDSP_ENC, WRDSP_DESC; + +// MIPS DSP Rev 2 +let Predicates = [HasDSPR2] in { + +def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC; +def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC; +def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC; +def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC; +def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC; +def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC; +def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC; +def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC; +def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC; +def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC; +def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC; +def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC; +def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC; +def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC; +def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC; +def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC; +def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC; +def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC; +def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC; +def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC; +def MUL_PH : MUL_PH_ENC, MUL_PH_DESC; +def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC; +def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC; +def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC; +def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC; +def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC; +def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC; +def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC; +def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC; +def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC; +def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC; +def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC; +def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC; +def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC; +def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC; +def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC; +def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC; +def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC; +def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC; +def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC; +def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC; +def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC; +def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC; +def APPEND : APPEND_ENC, APPEND_DESC; +def BALIGN : BALIGN_ENC, BALIGN_DESC; +def PREPEND : PREPEND_ENC, PREPEND_DESC; + +} + +// Pseudos. +def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary, + MULSAQ_S_W_PH>; +def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary, + MAQ_S_W_PHL>; +def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary, + MAQ_S_W_PHR>; +def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary, + MAQ_SA_W_PHL>; +def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary, + MAQ_SA_W_PHR>; +def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary, + DPAU_H_QBL>; +def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary, + DPAU_H_QBR>; +def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary, + DPSU_H_QBL>; +def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary, + DPSU_H_QBR>; +def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary, + DPAQ_S_W_PH>; +def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary, + DPSQ_S_W_PH>; +def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary, + DPAQ_SA_L_W>; +def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary, + DPSQ_SA_L_W>; + +def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>, + IsCommutable; +def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>, + IsCommutable; +def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>, + IsCommutable, UseAC; +def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>, + IsCommutable, UseAC; +def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>, + UseAC; +def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>, + UseAC; + +def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>; +def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>; +def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>; + +let Predicates = [HasDSPR2] in { + +def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>; +def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>; +def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary, + DPAQX_S_W_PH>; +def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary, + DPAQX_SA_W_PH>; +def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary, + DPAX_W_PH>; +def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary, + DPSX_W_PH>; +def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary, + DPSQX_S_W_PH>; +def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary, + DPSQX_SA_W_PH>; +def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary, + MULSA_W_PH>; + +} + +// Patterns. +class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : + Pat<pattern, result>, Requires<[pred]>; + +class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC, + RegisterClass SrcRC> : + DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))), + (COPY_TO_REGCLASS SrcRC:$src, DstRC)>; + +def : BitconvertPat<i32, v2i16, CPURegs, DSPRegs>; +def : BitconvertPat<i32, v4i8, CPURegs, DSPRegs>; +def : BitconvertPat<v2i16, i32, DSPRegs, CPURegs>; +def : BitconvertPat<v4i8, i32, DSPRegs, CPURegs>; + +def : DSPPat<(v2i16 (load addr:$a)), + (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(v4i8 (load addr:$a)), + (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>; +def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; +def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), + (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>; + +// Extr patterns. +class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>; + +class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> : + DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>; + +def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTPDP, EXTPDP>; +def : EXTR_W_TY1_R2_Pat<MipsEXTPDP, EXTPDPV>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_W, EXTR_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_W, EXTRV_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_R_W, EXTR_R_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_R_W, EXTRV_R_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>; +def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>; +def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b1220d6250..e9f330ffc1 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -875,6 +875,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); // @LOCALMOD-BEGIN case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); @@ -988,6 +990,70 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, return BB; } */ + +MachineBasicBlock * +MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1096,6 +1162,8 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return EmitAtomicCmpSwap(MI, BB, 8); + case Mips::BPOSGE32_PSEUDO: + return EmitBPOSGE32(MI, BB); } } @@ -2340,6 +2408,151 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } +// This function expands mips intrinsic nodes which have 64-bit input operands +// or output values. +// +// out64 = intrinsic-node in64 +// => +// lo = copy (extract-element (in64, 0)) +// hi = copy (extract-element (in64, 1)) +// mips-specific-node +// v0 = copy lo +// v1 = copy hi +// out64 = merge-values (v0, v1) +// +static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, bool HasI64In, bool HasI64Out) { + DebugLoc DL = Op.getDebugLoc(); + bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; + SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode(); + SmallVector<SDValue, 3> Ops; + + if (HasI64In) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, + Op->getOperand(1 + HasChainIn), + DAG.getConstant(1, MVT::i32)); + + Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1)); + + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end()); + Ops.push_back(Chain.getValue(1)); + } else { + Ops.push_back(Chain); + Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end()); + } + + if (!HasI64Out) + return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(), + Ops.begin(), Ops.size()); + + SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Ops.begin(), Ops.size()); + SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32, + Intr.getValue(1)); + SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32, + OutLo.getValue(2)); + SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi); + + if (!HasChainIn) + return Out; + + SDValue Vals[] = { Out, OutHi.getValue(1) }; + return DAG.getMergeValues(Vals, 2, DL); +} + +SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_shilo: + return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + case Intrinsic::mips_dpau_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + case Intrinsic::mips_dpau_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + case Intrinsic::mips_dpsu_h_qbl: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + case Intrinsic::mips_dpsu_h_qbr: + return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + case Intrinsic::mips_dpa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + case Intrinsic::mips_dps_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + case Intrinsic::mips_dpax_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + case Intrinsic::mips_dpsx_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + case Intrinsic::mips_mulsa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + case Intrinsic::mips_mult: + return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + case Intrinsic::mips_multu: + return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + case Intrinsic::mips_madd: + return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + case Intrinsic::mips_maddu: + return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + case Intrinsic::mips_msub: + return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + case Intrinsic::mips_msubu: + return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + } +} + +SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { + default: + return SDValue(); + case Intrinsic::mips_extp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + case Intrinsic::mips_extpdp: + return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + case Intrinsic::mips_extr_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + case Intrinsic::mips_extr_r_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + case Intrinsic::mips_extr_rs_w: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + case Intrinsic::mips_extr_s_h: + return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + case Intrinsic::mips_mthlip: + return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + case Intrinsic::mips_mulsaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + case Intrinsic::mips_maq_s_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + case Intrinsic::mips_maq_s_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + case Intrinsic::mips_maq_sa_w_phl: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + case Intrinsic::mips_maq_sa_w_phr: + return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + case Intrinsic::mips_dpaq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + case Intrinsic::mips_dpsq_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + case Intrinsic::mips_dpaq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + case Intrinsic::mips_dpsq_sa_l_w: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + case Intrinsic::mips_dpaqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + case Intrinsic::mips_dpaqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + case Intrinsic::mips_dpsqx_s_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + case Intrinsic::mips_dpsqx_sa_w_ph: + return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + } +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4e9398430b..2dce449765 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -202,6 +202,8 @@ namespace llvm { bool IsSRA) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; // @LOCALMOD-BEGIN SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; @@ -265,6 +267,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 3f98ae857b..6fa94a96e5 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1266,3 +1266,8 @@ include "MipsCondMov.td" include "Mips16InstrFormats.td" include "Mips16InstrInfo.td" + +// DSP +include "MipsDSPInstrFormats.td" +include "MipsDSPInstrInfo.td" + diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 3eab5a452e..c4a6016105 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -33,11 +33,11 @@ public: MipsMCInstLower(MipsAsmPrinter &asmprinter); void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; - MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; }; } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index df3c4c0de0..93ce94803a 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -45,9 +45,7 @@ class MipsFunctionInfo : public MachineFunctionInfo { // Range of frame object indices. // InArgFIRange: Range of indices of all frame objects created during call to // LowerFormalArguments. - // OutArgFIRange: Range of indices of all frame objects created during call to - // LowerCall except for the frame object for restoring $gp. - std::pair<int, int> InArgFIRange, OutArgFIRange; + std::pair<int, int> InArgFIRange; unsigned MaxCallFrameSize; bool EmitNOAT; @@ -56,7 +54,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) + MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -64,16 +62,6 @@ public: } void setLastInArgFI(int FI) { InArgFIRange.second = FI; } - bool isOutArgFI(int FI) const { - return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second; - } - void extendOutArgFIRange(int FirstFI, int LastFI) { - if (!OutArgFIRange.second) - // this must be the first time this function was called. - OutArgFIRange.first = FirstFI; - OutArgFIRange.second = LastFI; - } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index ae4813e128..a72e3b857f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -14,6 +14,8 @@ let Namespace = "Mips" in { def sub_fpeven : SubRegIndex; def sub_fpodd : SubRegIndex; def sub_32 : SubRegIndex; +def sub_lo : SubRegIndex; +def sub_hi : SubRegIndex; } // We have banks of 32 registers each. @@ -247,33 +249,11 @@ let Namespace = "Mips" in { def HWR29_64 : Register<"29">; // Accum registers - def LO0 : Register<"ac0"> { - let Aliases = [LO]; - } - def HI0 : Register<"hi0"> { - let Aliases = [HI]; - } - def LO1 : Register<"ac1">; - def HI1 : Register<"hi1">; - def LO2 : Register<"ac2">; - def HI2 : Register<"hi2">; - def LO3 : Register<"ac3">; - def HI3 : Register<"hi3">; - - let SubRegIndices = [sub_32] in { - def LO0_64 : RegisterWithSubRegs<"ac0", [LO0]> { - let Aliases = [LO64]; - } - def HI0_64 : RegisterWithSubRegs<"hi0", [HI0]> { - let Aliases = [HI64]; - } - def LO1_64 : RegisterWithSubRegs<"ac1", [LO1]>; - def HI1_64 : RegisterWithSubRegs<"hi1", [HI1]>; - def LO2_64 : RegisterWithSubRegs<"ac2", [LO2]>; - def HI2_64 : RegisterWithSubRegs<"hi2", [HI2]>; - def LO3_64 : RegisterWithSubRegs<"ac3", [LO3]>; - def HI3_64 : RegisterWithSubRegs<"hi3", [HI3]>; - } + let SubRegIndices = [sub_lo, sub_hi] in + def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>; + def AC1 : Register<"ac1">; + def AC2 : Register<"ac2">; + def AC3 : Register<"ac3">; def DSPCtrl : Register<"dspctrl">; } @@ -322,6 +302,7 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -357,9 +338,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; -// Accum Registers -def HIRegs : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>; -def LORegs : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>; - -def HI64Regs : RegisterClass<"Mips", [i64], 64, (sequence "HI%u_64", 0, 3)>; -def LO64Regs : RegisterClass<"Mips", [i64], 64, (sequence "LO%u_64", 0, 3)>; +// Accumulator Registers +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index d868f73758..8e2c2c5174 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -91,8 +91,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // getFrameRegister() returns. unsigned FrameReg; - if (MipsFI->isOutArgFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -106,12 +105,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // incoming argument, callee-saved register location or local variable. int64_t Offset; - if (MipsFI->isOutArgFI(FrameIndex)) - Offset = SPOffset; - else - Offset = SPOffset + (int64_t)StackSize; - - Offset += MI.getOperand(OpNo + 1).getImm(); + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 7f5927d8ed..1ff41ca358 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -31,7 +31,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), - HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false) + HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false), + HasDSP(false), HasDSPR2(false), IsAndroid(false) // @LOCALMOD-START , TargetTriple(TT) // @LOCALMOD-END diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 97d3600b1b..d8851a04eb 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -193,7 +193,7 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. - bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + bool DisableRedZone = MF.getFunction()->getFnAttributes().hasNoRedZoneAttr(); // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can // still generate stackless code if all local vars are reg-allocated. // Try: (FrameSize <= 224 @@ -255,7 +255,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (MF.getFunction()->getFnAttributes().hasNakedAttr()) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2e8fa1842a..27f26cd5fd 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2048,7 +2048,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; @@ -2103,7 +2104,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, + CurArgOffset), ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -2136,7 +2138,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( } SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN, - MachinePointerInfo(), + MachinePointerInfo(FuncArg, ArgOffset), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -6000,7 +6002,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->hasFnAttr(Attribute::Naked); + !MF.getFunction()->getFnAttributes().hasNakedAttr(); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 285e74a4c2..1665d7313c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -596,7 +596,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + if (!MF.getFunction()->getFnAttributes().hasNakedAttr()) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index cc6dc1e259..0040147022 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -314,6 +314,8 @@ void TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); + assert(pref_align < (1 << 16) && "Alignment doesn't fit in bitfield"); + assert(bit_width < (1 << 24) && "Bit width doesn't fit in bitfield"); for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { if (Alignments[i].AlignType == align_type && Alignments[i].TypeBitWidth == bit_width) { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 77961e53ae..9263bdde20 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -60,10 +60,6 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool mnemonicIsValid(StringRef Mnemonic) { - return mnemonicIsValidImpl(Mnemonic); - } - bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); @@ -77,13 +73,6 @@ private: unsigned &OrigErrorInfo, bool matchingInlineAsm = false); - unsigned getMCInstOperandNum(unsigned Kind, MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - unsigned OperandNum, unsigned &NumMCOperands) { - return getMCInstOperandNumImpl(Kind, Inst, Operands, OperandNum, - NumMCOperands); - } - /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1636,16 +1625,20 @@ MatchInstruction(SMLoc IDLoc, unsigned &Kind, unsigned Match1, Match2, Match3, Match4; unsigned tKind; - Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match1 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match1 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[1]; - Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match2 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match2 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[2]; - Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match3 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match3 == Match_Success) Kind = tKind; Tmp[Base.size()] = Suffixes[3]; - Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore); + Match4 = MatchInstructionImpl(Operands, tKind, Inst, ErrorInfoIgnore, + isParsingIntelSyntax()); if (Match4 == Match_Success) Kind = tKind; // Restore the old token. diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 46e72f9f60..b123afa001 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" @@ -38,6 +39,12 @@ void X86ATTInstPrinter::printRegName(raw_ostream &OS, void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + // Try to print any aliases first. if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index ad14e34707..f9bb3be9d7 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCExpr.h" @@ -32,6 +33,12 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + if (TSFlags & X86II::LOCK) + OS << "\tlock\n"; + printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3c0e3e6f2d..7706b9308e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -34,6 +34,10 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), clEnumValN(Intel, "intel", "Emit Intel-style assembly"), clEnumValEnd)); +static cl::opt<bool> +MarkedJTDataRegions("mark-data-regions", cl::init(false), + cl::desc("Mark code section jump table data regions."), + cl::Hidden); void X86MCAsmInfoDarwin::anchor() { } @@ -59,6 +63,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { SupportsDebugInformation = true; DwarfUsesInlineInfoSection = true; + UseDataRegionDirectives = MarkedJTDataRegions; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index f0f1982d57..7ff058edbc 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -11,11 +11,13 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Object/MachOFormat.h" using namespace llvm; @@ -23,7 +25,7 @@ using namespace llvm::object; namespace { class X86MachObjectWriter : public MCMachObjectTargetWriter { - void RecordScatteredRelocation(MachObjectWriter *Writer, + bool RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -335,7 +337,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Writer->addRelocation(Fragment->getParent(), MRE); } -void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, +bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -381,6 +383,19 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // Relocations are written out in reverse order, so the PAIR comes first. if (Type == macho::RIT_Difference || Type == macho::RIT_Generic_LocalDifference) { + // If the offset is too large to fit in a scattered relocation, + // we're hosed. It's an unfortunate limitation of the MachO format. + if (FixupOffset > 0xffffff) { + char Buffer[32]; + format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer)); + Asm.getContext().FatalError(Fixup.getLoc(), + Twine("Section too large, can't encode " + "r_address (") + Buffer + + ") into 24 bits of scattered " + "relocation entry."); + llvm_unreachable("fatal error returned?!"); + } + macho::RelocationEntry MRE; MRE.Word0 = ((0 << 0) | (macho::RIT_Pair << 24) | @@ -389,6 +404,16 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value2; Writer->addRelocation(Fragment->getParent(), MRE); + } else { + // If the offset is more than 24-bits, it won't fit in a scattered + // relocation offset field, so we fall back to using a non-scattered + // relocation. This is a bit risky, as if the offset reaches out of + // the block and the linker is doing scattered loading on this + // symbol, things can go badly. + // + // Required for 'as' compatibility. + if (FixupOffset > 0xffffff) + return false; } macho::RelocationEntry MRE; @@ -399,6 +424,7 @@ void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, macho::RF_Scattered); MRE.Word1 = Value; Writer->addRelocation(Fragment->getParent(), MRE); + return true; } void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, @@ -469,9 +495,11 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered // relocations. - if (Target.getSymB()) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + if (Target.getSymB()) { + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue); + return; + } // Get the symbol data, if any. MCSymbolData *SD = 0; @@ -483,9 +511,13 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) - return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + // Try to record the scattered relocation if needed. Fall back to non + // scattered if necessary (see comments in RecordScatteredRelocation() + // for details). + if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) && + RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, + Target, Log2Size, FixedValue)) + return; // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0d8def0e47..85922f1277 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -676,7 +676,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + if (Is64Bit && !Fn->getFnAttributes().hasNoRedZoneAttr() && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index b409e88148..767e261a82 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -443,7 +443,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + OptForSize = MF->getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { @@ -2253,6 +2253,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: case X86ISD::ATOMNAND64_DAG: case X86ISD::ATOMAND64_DAG: + case X86ISD::ATOMMAX64_DAG: + case X86ISD::ATOMMIN64_DAG: + case X86ISD::ATOMUMAX64_DAG: + case X86ISD::ATOMUMIN64_DAG: case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { @@ -2263,6 +2267,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; + case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; + case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; + case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; } SDNode *RetVal = SelectAtomic64(Node, Opc); @@ -2389,13 +2397,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue N1 = Node->getOperand(1); bool isSigned = Opcode == ISD::SMUL_LOHI; + bool hasBMI2 = Subtarget->hasBMI2(); if (!isSigned) { switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; + case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r; + MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break; + case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r; + MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break; } } else { switch (NVT.getSimpleVT().SimpleTy) { @@ -2407,13 +2418,31 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + unsigned SrcReg, LoReg, HiReg; + switch (Opc) { + default: llvm_unreachable("Unknown MUL opcode!"); + case X86::IMUL8r: + case X86::MUL8r: + SrcReg = LoReg = X86::AL; HiReg = X86::AH; + break; + case X86::IMUL16r: + case X86::MUL16r: + SrcReg = LoReg = X86::AX; HiReg = X86::DX; + break; + case X86::IMUL32r: + case X86::MUL32r: + SrcReg = LoReg = X86::EAX; HiReg = X86::EDX; + break; + case X86::IMUL64r: + case X86::MUL64r: + SrcReg = LoReg = X86::RAX; HiReg = X86::RDX; + break; + case X86::MULX32rr: + SrcReg = X86::EDX; LoReg = HiReg = 0; + break; + case X86::MULX64rr: + SrcReg = X86::RDX; LoReg = HiReg = 0; + break; } SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; @@ -2425,22 +2454,47 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { std::swap(N0, N1); } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg, N0, SDValue()).getValue(1); + SDValue ResHi, ResLo; if (foldedLoad) { + SDValue Chain; SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; - SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); + if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + Chain = SDValue(CNode, 2); + InFlag = SDValue(CNode, 3); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, + array_lengthof(Ops)); + Chain = SDValue(CNode, 0); + InFlag = SDValue(CNode, 1); + } // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + ReplaceUses(N1.getValue(1), Chain); } else { - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); - InFlag = SDValue(CNode, 0); + SDValue Ops[] = { N1, InFlag }; + if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + ResHi = SDValue(CNode, 0); + ResLo = SDValue(CNode, 1); + InFlag = SDValue(CNode, 2); + } else { + SDVTList VTs = CurDAG->getVTList(MVT::Glue); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 0); + } } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -2465,19 +2519,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 0), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResLo.getNode() == 0) { + assert(LoReg && "Register for low half is not defined!"); + ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, + InFlag); + InFlag = ResLo.getValue(2); + } + ReplaceUses(SDValue(Node, 0), ResLo); + DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(SDValue(Node, 1), Result); - DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); + if (ResHi.getNode() == 0) { + assert(HiReg && "Register for high half is not defined!"); + ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, + InFlag); + InFlag = ResHi.getValue(2); + } + ReplaceUses(SDValue(Node, 1), ResHi); + DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } return NULL; @@ -2678,7 +2738,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i8, Reg); // Emit a testb. - return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2709,8 +2775,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only // target GR8_NOREX registers, so make sure the register class is // forced. - return CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, MVT::i32, - Subreg, ShiftedImm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, + MVT::i32, Subreg, ShiftedImm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2726,7 +2797,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i16, Reg); // Emit a testw. - return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2742,7 +2819,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MVT::i32, Reg); // Emit a testl. - return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); + SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, + Subreg, Imm); + // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has + // one, do not call ReplaceAllUsesWith. + ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), + SDValue(NewNode, 0)); + return NULL; } } break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bdfe245027..ffaf04cea7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -522,6 +522,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); } if (Subtarget->hasCmpxchg16b()) { @@ -1357,7 +1361,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); if (IsZeroVal && - !F->hasFnAttr(Attribute::NoImplicitFloat)) { + !F->getFnAttributes().hasNoImplicitFloatAttr()) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -2048,7 +2052,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getFnAttributes().hasNoImplicitFloatAttr(); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2240,7 +2244,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, - MF.getFunction()->hasStructRetAttr(), + MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require @@ -2524,7 +2528,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) && - cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) { + cast<Function>(GV)->getFnAttributes().hasNonLazyBindAttr()) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -2761,6 +2765,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2772,6 +2777,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + + // If the function return type is x86_fp80 and the callee return type is not, + // then the FP_EXTEND of the call result is not a nop. It's not safe to + // perform a tailcall optimization here. + if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) + return false; + CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; @@ -6661,7 +6673,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + bool OptForSize = MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr(); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -9783,7 +9795,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + .getFunction()->getFnAttributes().hasNoImplicitFloatAttr()) && Subtarget->hasSSE1()); } @@ -11769,6 +11781,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_SWAP: { unsigned Opc; switch (N->getOpcode()) { @@ -11791,6 +11807,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_LOAD_XOR: Opc = X86ISD::ATOMXOR64_DAG; break; + case ISD::ATOMIC_LOAD_MAX: + Opc = X86ISD::ATOMMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_MIN: + Opc = X86ISD::ATOMMIN64_DAG; + break; + case ISD::ATOMIC_LOAD_UMAX: + Opc = X86ISD::ATOMUMAX64_DAG; + break; + case ISD::ATOMIC_LOAD_UMIN: + Opc = X86ISD::ATOMUMIN64_DAG; + break; case ISD::ATOMIC_SWAP: Opc = X86ISD::ATOMSWAP64_DAG; break; @@ -12182,6 +12210,10 @@ static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; + case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; + case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; + case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; + case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; } llvm_unreachable("Unhandled atomic-load-op opcode!"); } @@ -12499,6 +12531,7 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, SrcHiReg = MI->getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = &X86::GR32RegClass; + const TargetRegisterClass *RC8 = &X86::GR8RegClass; unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; @@ -12586,6 +12619,55 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); break; } + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: { + unsigned HiOpc; + unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); + unsigned cL = MRI.createVirtualRegister(RC8); + unsigned cH = MRI.createVirtualRegister(RC8); + unsigned cL32 = MRI.createVirtualRegister(RC); + unsigned cH32 = MRI.createVirtualRegister(RC); + unsigned cc = MRI.createVirtualRegister(RC); + // cl := cmp src_lo, lo + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), cL); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); + // ch := cmp src_hi, hi + BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) + .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), cH); + BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); + // cc := if (src_hi == hi) ? cl : ch; + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) + .addReg(cH32).addReg(cL32); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) + .addReg(cH32).addReg(cL32) + .addImm(X86::COND_E); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); + if (Subtarget->hasCMov()) { + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) + .addReg(SrcLoReg).addReg(LoReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) + .addReg(SrcHiReg).addReg(HiReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) + .addReg(SrcLoReg).addReg(LoReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) + .addReg(SrcHiReg).addReg(HiReg) + .addImm(X86::COND_NE); + mainMBB = EmitLoweredSelect(MIB, mainMBB); + } + break; + } case X86::ATOMSWAP6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); @@ -13576,6 +13658,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMNAND6432: case X86::ATOMADD6432: case X86::ATOMSUB6432: + case X86::ATOMMAX6432: + case X86::ATOMMIN6432: + case X86::ATOMUMAX6432: + case X86::ATOMUMIN6432: case X86::ATOMSWAP6432: return EmitAtomicLoadArith6432(MI, BB); @@ -15562,7 +15648,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool NoImplicitFloatOps = F->getFnAttributes().hasNoImplicitFloatAttr(); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d3545b0e9f..a53909b7a0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -355,6 +355,10 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, + ATOMMAX64_DAG, + ATOMMIN64_DAG, + ATOMUMAX64_DAG, + ATOMUMIN64_DAG, ATOMSWAP64_DAG, // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. @@ -752,6 +756,7 @@ namespace llvm { bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, + Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 1296bcbe89..3a1ac11f9c 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -561,7 +561,6 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; @@ -581,72 +580,72 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", + !strconcat(mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", + !strconcat(mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, OpSize, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; @@ -666,16 +665,16 @@ multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), - !strconcat("lock\n\t", mnemonic, "{b}\t$dst"), + !strconcat(mnemonic, "{b}\t$dst"), [], IIC_UNARY_MEM>, LOCK; def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst), - !strconcat("lock\n\t", mnemonic, "{w}\t$dst"), + !strconcat(mnemonic, "{w}\t$dst"), [], IIC_UNARY_MEM>, OpSize, LOCK; def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst), - !strconcat("lock\n\t", mnemonic, "{l}\t$dst"), + !strconcat(mnemonic, "{l}\t$dst"), [], IIC_UNARY_MEM>, LOCK; def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst), - !strconcat("lock\n\t", mnemonic, "{q}\t$dst"), + !strconcat(mnemonic, "{q}\t$dst"), [], IIC_UNARY_MEM>, LOCK; } } @@ -689,7 +688,7 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic, InstrItinClass itin> { let isCodeGenOnly = 1 in { def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr), - !strconcat("lock\n\t", mnemonic, "\t$ptr"), + !strconcat(mnemonic, "\t$ptr"), [(frag addr:$ptr)], itin>, TB, LOCK; } } @@ -700,23 +699,19 @@ multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, let isCodeGenOnly = 1 in { let Defs = [AL, EFLAGS], Uses = [AL] in def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), - !strconcat("lock\n\t", mnemonic, - "{b}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), - !strconcat("lock\n\t", mnemonic, - "{w}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), - !strconcat("lock\n\t", mnemonic, - "{l}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), - !strconcat("lock\n\t", mnemonic, - "{q}\t{$swap, $ptr|$ptr, $swap}"), + !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK; } } @@ -744,31 +739,27 @@ multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{b}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), [(set GR8:$dst, (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], itin8>; def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{w}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), [(set GR16:$dst, (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], itin>, OpSize; def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{l}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), [(set GR32:$dst, (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], itin>; def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val, i64mem:$ptr), - !strconcat("lock\n\t", mnemonic, - "{q}\t{$val, $ptr|$ptr, $val}"), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), [(set GR64:$dst, (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e595876dcf..af570adb79 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -566,6 +566,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + + // BMI/BMI2 foldable instructions + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, + { X86::SARX32rr, X86::SARX32rm, 0 }, + { X86::SARX64rr, X86::SARX64rm, 0 }, + { X86::SHRX32rr, X86::SHRX32rm, 0 }, + { X86::SHRX64rr, X86::SHRX64rm, 0 }, + { X86::SHLX32rr, X86::SHLX32rm, 0 }, + { X86::SHLX64rr, X86::SHLX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1145,6 +1155,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_16 }, { X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4mrY, TB_ALIGN_32 }, { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, + + // BMI/BMI2 foldable instructions + { X86::MULX32rr, X86::MULX32rm, 0 }, + { X86::MULX64rr, X86::MULX64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -3812,7 +3826,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3853,7 +3867,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + if (!MF.getFunction()->getFnAttributes().hasOptimizeForSizeAttr() && hasPartialRegUpdate(MI->getOpcode())) return 0; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4fce5acc23..5074724fb8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -568,17 +568,17 @@ def HasMMX : Predicate<"Subtarget->hasMMX()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; -def UseSSE1 : Predicate<"Subtarget->hasSSE1() && Subtarget->hasNoAVX()">; +def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; -def UseSSE2 : Predicate<"Subtarget->hasSSE2() && Subtarget->hasNoAVX()">; +def UseSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; -def UseSSE3 : Predicate<"Subtarget->hasSSE3() && Subtarget->hasNoAVX()">; +def UseSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; -def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && Subtarget->hasNoAVX()">; +def UseSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; -def UseSSE41 : Predicate<"Subtarget->hasSSE41() && Subtarget->hasNoAVX()">; +def UseSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; -def UseSSE42 : Predicate<"Subtarget->hasSSE42() && Subtarget->hasNoAVX()">; +def UseSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index bdeb63ffbd..893488c159 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -839,6 +839,16 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] +def ROT32L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 32-bit integer. + return getI8Imm(32 - N->getZExtValue()); +}]>; + +def ROT64L2R_imm8 : SDNodeXForm<imm, [{ + // Convert a ROTL shamt to a ROTR shamt on 64-bit integer. + return getI8Imm(64 - N->getZExtValue()); +}]>; + multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -873,4 +883,72 @@ let Predicates = [HasBMI2] in { defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + + // Prefer RORX which is non-destructive and doesn't update EFLAGS. + let AddedComplexity = 10 in { + def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), + (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), + (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; + } + + def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)), + (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), + (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; + + // Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not + // immedidate shift, i.e. the following code is considered better + // + // mov %edi, %esi + // shl $imm, %esi + // ... %edi, ... + // + // than + // + // movb $imm, %sil + // shlx %sil, %edi, %esi + // ... %edi, ... + // + let AddedComplexity = 1 in { + def : Pat<(sra GR32:$src1, GR8:$src2), + (SARX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(sra GR64:$src1, GR8:$src2), + (SARX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(srl GR32:$src1, GR8:$src2), + (SHRX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(srl GR64:$src1, GR8:$src2), + (SHRX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + + def : Pat<(shl GR32:$src1, GR8:$src2), + (SHLX32rr GR32:$src1, + (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + def : Pat<(shl GR64:$src1, GR8:$src2), + (SHLX64rr GR64:$src1, + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + } + + // Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor + // + // mov (%ecx), %esi + // shl $imm, $esi + // + // over + // + // movb $imm %al + // shlx %al, (%ecx), %esi + // + // As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole + // optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible. } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 582f5e99ff..262d32e4e6 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -423,7 +423,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const Function *F = MF.getFunction(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + F->getFnAttributes().hasStackAlignmentAttr()); // If we've requested that we force align the stack do so now. if (ForceStackAlign) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 4c7b8fc4de..921ded8f2d 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -205,7 +205,6 @@ public: bool hasSSE42() const { return X86SSELevel >= SSE42; } bool hasAVX() const { return X86SSELevel >= AVX; } bool hasAVX2() const { return X86SSELevel >= AVX2; } - bool hasNoAVX() const { return X86SSELevel < AVX; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index b94dd69deb..10f5b6e658 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -592,14 +592,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *RetTy = FTy->getReturnType(); - // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which - // have zero fixed arguments. - bool ExtraArgHack = false; - if (Params.empty() && FTy->isVarArg()) { - ExtraArgHack = true; - Params.push_back(Type::getInt32Ty(F->getContext())); - } - // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); @@ -711,9 +703,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } } - if (ExtraArgHack) - Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); - // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); @@ -870,16 +859,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Increment I2 past all of the arguments added for this promoted pointer. - for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) - ++I2; + std::advance(I2, ArgIndices.size()); } - // Notify the alias analysis implementation that we inserted a new argument. - if (ExtraArgHack) - AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), - NF->arg_begin()); - - // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index fd23a935b9..c7429c5954 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -717,9 +717,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) - RAttrs &= ~Attribute::typeIncompatible(NRetTy); + RAttrs &= ~Attributes::typeIncompatible(NRetTy); else - assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 + assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0 && "Return attributes no longer compatible?"); if (RAttrs) @@ -786,7 +786,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Attributes RAttrs = CallPAL.getRetAttributes(); Attributes FnAttrs = CallPAL.getFnAttributes(); // Adjust in case the function was changed to return void. - RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType()); + RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType()); if (RAttrs) AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index b888e95982..b1ba6be5ff 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -962,7 +962,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // If we get here we could have other crazy uses that are transitively // loaded. assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || - isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) && + isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || + isa<BitCastInst>(GlobalUser) || + isa<GetElementPtrInst>(GlobalUser)) && "Only expect load and stores!"); } } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 664ddf6f7a..42f0991360 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { /// \brief Minimal filter to detect invalid constructs for inlining. static bool isInlineViable(Function &F) { - bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice); + bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr(); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { // Disallow inlining of functions which contain an indirect branch. if (isa<IndirectBrInst>(BI->getTerminator())) @@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { if (Callee->isDeclaration()) return InlineCost::getNever(); // Return never for anything not marked as always inline. - if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + if (!Callee->getFnAttributes().hasAlwaysInlineAttr()) return InlineCost::getNever(); // Do some minimal analysis to preclude non-viable functions. diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index a9263baa44..7932b40bdc 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -93,10 +93,10 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. - if (Callee->hasFnAttr(Attribute::StackProtectReq)) + if (Callee->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->hasFnAttr(Attribute::StackProtect) && - !Caller->hasFnAttr(Attribute::StackProtectReq)) + else if (Callee->getFnAttributes().hasStackProtectAttr() && + !Caller->getFnAttributes().hasStackProtectReqAttr()) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we @@ -209,7 +209,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && - Caller->hasFnAttr(Attribute::OptimizeForSize); + Caller->getFnAttributes().hasOptimizeForSizeAttr(); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; @@ -217,7 +217,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // Listen to the inlinehint attribute when it would increase the threshold. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint); + Callee->getFnAttributes().hasInlineHintAttr(); if (InlineHint && HintThreshold > thres) thres = HintThreshold; @@ -533,7 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline)) + if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr()) continue; // If the only remaining users of the function are dead constants, remove diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index c81b333813..9e328b9ac9 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -211,13 +211,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes - // GlobalOpt already deletes dead functions and globals, at -O3 try a + // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. - if (OptLevel > 2) + if (OptLevel > 1) { MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - - if (OptLevel > 1) MPM.add(createConstantMergePass()); // Merge dup global constants + } } addExtensionsToPM(EP_OptimizerLast, MPM); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 23c08699ff..ac30dcdcbf 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + if (RAttrs & Attributes::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } @@ -1067,7 +1067,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attribute::typeIncompatible(ParamTy)) + if (Attrs & Attributes::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a @@ -1141,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + RAttrs &= ~Attributes::typeIncompatible(NewRetTy); // Add the new return attributes. if (RAttrs) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5b6cf4a4a8..a446e427e5 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -264,26 +264,28 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - EraseInstFromFunction(*ToDelete[i]); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); - EraseInstFromFunction(*Copy); - ++NumGlobalCopies; - return NewI; + if (TD) { + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } } } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 291e80019e..0ba7340e64 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -903,7 +903,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) { unsigned VWidth = VecTy->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); @@ -912,6 +912,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, V); return &SI; } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) { + // Form a shufflevector instruction. + SmallVector<Constant *, 8> Mask(VWidth); + Type *Int32Ty = Type::getInt32Ty(CV->getContext()); + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elem = cast<Constant>(CV->getOperand(i)); + if (ConstantInt *E = dyn_cast<ConstantInt>(Elem)) + Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0)); + else if (isa<UndefValue>(Elem)) + Mask[i] = UndefValue::get(Int32Ty); + else + return 0; + } + Constant *MaskVal = ConstantVector::get(Mask); + Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal); + return ReplaceInstUsesWith(SI, V); + } + + if (isa<ConstantAggregateZero>(CondVal)) { + return ReplaceInstUsesWith(SI, FalseVal); + } } return 0; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index afa6a4b5e6..1b102bd243 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -854,7 +854,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { // If needed, insert __asan_init before checking for AddressSafety attr. maybeInsertAsanInitAtFunctionEntry(F); - if (!F.hasFnAttr(Attribute::AddressSafety)) return false; + if (!F.getFnAttributes().hasAddressSafetyAttr()) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 495cdc6321..305d70f27b 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); - OptSize = F.hasFnAttr(Attribute::OptimizeForSize); + OptSize = F.getFnAttributes().hasOptimizeForSizeAttr(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); - if (!SinglePred || SinglePred == BB) continue; + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { @@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && pred_begin(BB) == pred_end(BB)) + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 9b0aadb0b5..3ec6f3dcc3 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { // This case never fires - remove it. CI.getCaseSuccessor()->removePredecessor(BB); SI->removeCase(CI); // Does not invalidate the iterator. + + // The condition can be modified by removePredecessor's PHI simplification + // logic. + Cond = SI->getCondition(); + ++NumDeadCases; Changed = true; } else if (State == LazyValueInfo::True) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1ff4329c84..301ee2f663 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SetVector.h" @@ -45,6 +46,7 @@ namespace { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { @@ -55,6 +57,7 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TLI = AA->getTargetLibraryInfo(); bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) @@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I, /// hasMemoryWrite - Does this instruction write some memory? This only returns /// true for things that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I) { +static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) { return true; } } + if (CallSite CS = I) { + if (Function *F = CS.getCalledFunction()) { + if (TLI && TLI->has(LibFunc::strcpy) && + F->getName() == TLI->getName(LibFunc::strcpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncpy) && + F->getName() == TLI->getName(LibFunc::strncpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strcat) && + F->getName() == TLI->getName(LibFunc::strcat)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncat) && + F->getName() == TLI->getName(LibFunc::strncat)) { + return true; + } + } + } return false; } @@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// instruction if any. static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { - assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) && + "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->isUnordered(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. - return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - // Don't remove volatile memory intrinsics. - return !cast<MemIntrinsic>(II)->isVolatile(); + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } } + + if (CallSite CS = I) + return CS.getInstruction()->use_empty(); + + return false; } @@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) { if (isa<StoreInst>(I)) return false; - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } } + + // Don't shorten libcalls calls for now. + + return false; } /// getStoredPointerOperand - Return the pointer that is being written to. @@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getArgOperand(0); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getArgOperand(0); + } } + + CallSite CS = I; + // All the supported functions so far happen to have dest as their first + // argument. + return CS.getArgument(0); } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { @@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (CallInst *F = isFreeCall(Inst, TLI)) { MadeChange |= HandleFree(F); continue; } // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst)) + if (!hasMemoryWrite(Inst, TLI)) continue; MemDepResult InstDep = MD->getDependency(Inst); @@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(SI, *MD, TLI); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(DepWrite, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency)) break; Value *DepPointer = @@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(Dependency, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && - !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); @@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { + if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) + if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 09a186f7f9..f8709a537f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // not user specified. unsigned Threshold = CurrentThreshold; if (!UserThreshold && - Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr()) Threshold = OptSizeUnrollThreshold; // Find trip count and trip multiple if count is not available diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 58f7739888..74c8f43ec2 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // Check to see if it would be profitable to unswitch current loop. // Do not do non-trivial unswitch while optimizing for size. - if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr()) return false; UnswitchNontrivialCondition(LoopCond, Val, currentLoop); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e3182d319c..a8dc0533bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -202,11 +202,11 @@ public: use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } - void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) { - Uses[Idx].insert(UI, U); + void use_push_back(unsigned Idx, const PartitionUse &U) { + Uses[Idx].push_back(U); } - void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) { - Uses[I - begin()].insert(UI, U); + void use_push_back(const_iterator I, const PartitionUse &U) { + Uses[I - begin()].push_back(U); } void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); } void use_erase(const_iterator I, use_iterator UI) { @@ -522,8 +522,10 @@ private: void insertUse(Instruction &I, int64_t Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which don't overlap the allocation. - if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) || + // Completely skip uses which have a zero size or don't overlap the + // allocation. + if (Size == 0 || + (Offset >= 0 && (uint64_t)Offset >= AllocSize) || (Offset < 0 && (uint64_t)-Offset >= Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which starts past the end of the " << AllocSize @@ -660,11 +662,14 @@ private: bool Inserted = false; llvm::tie(PMI, Inserted) = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); - if (!Inserted && Offsets.IsSplittable) { + if (Offsets.IsSplittable && + (!Inserted || II.getRawSource() == II.getRawDest())) { // We've found a memory transfer intrinsic which refers to the alloca as - // both a source and dest. We refuse to split these to simplify splitting - // logic. If possible, SROA will still split them into separate allocas - // and then re-analyze. + // both a source and dest. This is detected either by direct equality of + // the operand values, or when we visit the intrinsic twice due to two + // different chains of values leading to it. We refuse to split these to + // simplify splitting logic. If possible, SROA will still split them into + // separate allocas and then re-analyze. Offsets.IsSplittable = false; P.Partitions[PMI->second].IsSplittable = false; P.Partitions[NewIdx].IsSplittable = false; @@ -697,6 +702,9 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + // If there are no loads or stores, the access is dead. We mark that as + // a size zero access. + Size = 0; do { Instruction *I, *UsedI; llvm::tie(UsedI, I) = Uses.pop_back_val(); @@ -824,9 +832,9 @@ private: } void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { - // If the use extends outside of the allocation, record it as a dead use - // for elimination later. - if ((uint64_t)Offset >= AllocSize || + // If the use has a zero size or extends outside of the allocation, record + // it as a dead use for elimination later. + if (Size == 0 || (uint64_t)Offset >= AllocSize || (Offset < 0 && (uint64_t)-Offset >= Size)) return markAsDead(User); @@ -853,7 +861,7 @@ private: PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset), std::min(I->EndOffset, EndOffset), &User, cast<Instruction>(*U)); - P.Uses[I - P.begin()].push_back(NewUse); + P.use_push_back(I, NewUse); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[std::make_pair(&User, U->get())] = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); @@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI) Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); UB(); - for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I) - std::stable_sort(use_begin(I), use_end(I)); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -1890,7 +1896,8 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { assert(IntPromotionTy && "Alloca is not an integer we can extract from"); - Value *V = IRB.CreateLoad(&NewAI, getName(".load")); + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; if (RelOffset) @@ -1906,7 +1913,7 @@ private: StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); if (Ty == IntPromotionTy) - return IRB.CreateStore(V, &NewAI); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && "Cannot insert a larger integer!"); @@ -1918,10 +1925,12 @@ private: APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) .shl(RelOffset*8); - Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")), + Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".oldload")), Mask, getName(".mask")); - return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI); + return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), + &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -1943,12 +1952,12 @@ private: Value *Result; if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result - = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")), - getIndex(IRB, BeginOffset), - getName(".extract")); + Result = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + getIndex(IRB, BeginOffset), getName(".extract")); } else { - Result = IRB.CreateLoad(&NewAI, getName(".load")); + Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); } if (Result->getType() != LI.getType()) Result = getValueCast(IRB, Result, LI.getType()); @@ -1983,6 +1992,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); + if (LI.getAlignment()) + LI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -1996,13 +2008,14 @@ private: BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) V = getValueCast(IRB, V, ElementTy); - V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { V = getValueCast(IRB, V, VecTy); } - StoreInst *Store = IRB.CreateStore(V, &NewAI); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); (void)Store; @@ -2033,6 +2046,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); + if (SI.getAlignment()) + SI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2048,6 +2064,15 @@ private: // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); + + Type *CstTy = II.getAlignmentCst()->getType(); + if (!NewAI.getAlignment()) + II.setAlignment(ConstantInt::get(CstTy, 0)); + else + II.setAlignment( + ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset))); + deleteIfTriviallyDead(OldPtr); return false; } @@ -2067,11 +2092,15 @@ private: !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); + unsigned Align = 1; + if (NewAI.getAlignment()) + Align = MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset); CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, II.getAlignment(), + II.getValue(), Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2109,11 +2138,13 @@ private: // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { - StoreInst *Store = IRB.CreateStore( - IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + StoreInst *Store = IRB.CreateAlignedStore( + IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".load")), + V, getIndex(IRB, BeginOffset), getName(".insert")), - &NewAI); + &NewAI, NewAI.getAlignment()); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2131,7 +2162,8 @@ private: assert(V->getType() == VecTy); } - Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile()); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); @@ -2164,6 +2196,13 @@ private: else II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); + Type *CstTy = II.getAlignmentCst()->getType(); + if (II.getAlignment() > 1) + II.setAlignment(ConstantInt::get( + CstTy, MinAlign(II.getAlignment(), + MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)))); + DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldOp); return false; @@ -2221,6 +2260,11 @@ private: OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, getName("." + OtherPtr->getName())); + unsigned Align = II.getAlignment(); + if (Align > 1) + Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), + MinAlign(II.getAlignment(), NewAI.getAlignment())); + // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI @@ -2236,8 +2280,7 @@ private: CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, - Size, II.getAlignment(), - II.isVolatile()); + Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -2251,22 +2294,25 @@ private: Value *Src; if (IsVectorElement && !IsDest) { // We have to extract rather than load. - Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr, - getName(".copyload")), - getIndex(IRB, BeginOffset), - getName(".copyextract")); + Src = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), + getIndex(IRB, BeginOffset), + getName(".copyextract")); } else { - Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload")); + Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), + getName(".copyload")); } if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), - Src, getIndex(IRB, BeginOffset), - getName(".insert")); + Src = IRB.CreateInsertElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + Src, getIndex(IRB, BeginOffset), + getName(".insert")); } - Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile()); + StoreInst *Store = cast<StoreInst>( + IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); @@ -2460,8 +2506,7 @@ private: else { AllocaPartitioning::PartitionUse OtherUse = *UI; OtherUse.User = Load; - P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse), - OtherUse); + P.use_push_back(PI, OtherUse); } } } @@ -2559,7 +2604,7 @@ private: LoadInst *OtherLoad = IsTrueVal ? FL : TL; assert(OtherUse.Ptr == OtherLoad->getOperand(0)); OtherUse.User = OtherLoad; - P.use_insert(PI, P.use_end(PI), OtherUse); + P.use_push_back(PI, OtherUse); } // Transfer alignment and TBAA info if present. @@ -2576,8 +2621,6 @@ private: LI->replaceAllUsesWith(V); Pass.DeadInsts.push_back(LI); } - if (PI != P.end()) - std::stable_sort(P.use_begin(PI), P.use_end(PI)); deleteIfTriviallyDead(OldPtr); return NewPtr == &NewAI; @@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, assert(PI == P.begin() && "Begin offset is zero on later partition"); NewAI = &AI; } else { - // FIXME: The alignment here is overly conservative -- we could in many - // cases get away with much weaker alignment constraints. - NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(), + unsigned Alignment = AI.getAlignment(); + if (!Alignment) { + // The minimum alignment which users can rely on when the explicit + // alignment is omitted or zero is that required by the ABI for this + // type. + Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); + } + Alignment = MinAlign(Alignment, PI->BeginOffset); + // If we will get at least this much alignment from the type alone, leave + // the alloca's alignment unconstrained. + if (Alignment <= TD->getABITypeAlignment(AllocaTy)) + Alignment = 0; + NewAI = new AllocaInst(AllocaTy, 0, Alignment, AI.getName() + ".sroa." + Twine(PI - P.begin()), &AI); ++NumNewAllocas; diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 9d630349ab..55227e2714 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -23,11 +23,69 @@ using namespace llvm; +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + /// Generate code to divide two signed integers. Returns the quotient, rounded -/// towards 0. Builder's insert point should be pointing at the sdiv -/// instruction. This will generate a udiv in the process, and Builder's insert -/// point will be pointing at the udiv (if present, i.e. not folded), ready to -/// be expanded if the user wishes. +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Implementation taken from compiler-rt's __divsi3 @@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, } /// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing at -/// the udiv instruction. +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, return Q_5; } +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code @@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) { if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), - Div->getOperand(1), Builder); + Div->getOperand(1), Builder); Div->replaceAllUsesWith(Quotient); Div->dropAllReferences(); Div->eraseFromParent(); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 876ff2c337..065325b7c2 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -58,9 +58,10 @@ static cl::opt<bool> SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI, return true; } -/// BuildLookupTable - Build a lookup table with the contents of Results, using -/// DefaultResult to fill the holes in the table. If the table ends up -/// containing the same result in each element, set *SingleResult to that value -/// and return NULL. -static GlobalVariable *BuildLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, - Constant *DefaultResult, - Constant **SingleResult) { - assert(Results.size() && "Need values to build lookup table"); - assert(TableSize >= Results.size() && "Table needs to hold all values"); +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const TargetData *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. - Constant *SameResult = Results.begin()->second; + SingleValue = Values.begin()->second; // Build up the table contents. - std::vector<Constant*> TableContents(TableSize); - for (size_t I = 0, E = Results.size(); I != E; ++I) { - ConstantInt *CaseVal = Results[I].first; - Constant *CaseRes = Results[I].second; - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); TableContents[Idx] = CaseRes; - if (CaseRes != SameResult) - SameResult = NULL; + if (CaseRes != SingleValue) + SingleValue = NULL; } // Fill in any holes in the table with the default result. - if (Results.size() < TableSize) { - for (unsigned i = 0; i < TableSize; ++i) { - if (!TableContents[i]) - TableContents[i] = DefaultResult; + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; } - if (DefaultResult != SameResult) - SameResult = NULL; + if (DefaultValue != SingleValue) + SingleValue = NULL; + } + + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; } - // Same result was used in the entire table; just return that. - if (SameResult) { - *SingleResult = SameResult; - return NULL; + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; } - ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, - "switch.table"); - GV->setUnnamedAddr(true); - return GV; + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetData *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + if (SI->getNumCases() * 10 >= TableSize * 4) + return true; + + // If each table would fit in a register, we should build it anyway. + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second)) + return false; + } + return true; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetData* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); // FIXME: Handle unreachable cases. // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases are too small. + // Ignore the switch if the number of cases is too small. // This is similar to the check when building jump tables in // SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Determine the best cut-off. @@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - // The table density should be at lest 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - // Be careful to avoid overlow in the density computation. - if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) - return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (SI->getNumCases() * 10 < TableSize * 4) + if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes)) return false; - // Build the lookup tables. - SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; - SmallDenseMap<PHINode*, Constant*> SingleResults; - - Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - - Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, - ResultLists[PHI], DefaultResults[PHI], - &SingleResult); - SingleResults[PHI] = SingleResult; - } - // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), @@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); bool ReturnedEarly = false; - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - // There was a single result for this phi; just use that. - if (Constant *SingleResult = SingleResults[PHI]) { - PHI->addIncoming(SingleResult, LookupBB); - continue; - } + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; + + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); - Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; - Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, - "switch.gep"); - Value *Result = Builder.CreateLoad(GEP, "switch.load"); + Value *Result = Table.BuildLookup(TableIndex, Builder); // If the result is used to return immediately from the function, we want to // do that right here. @@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; - if (SwitchToLookupTable(SI, Builder)) + if (SwitchToLookupTable(SI, Builder, TD)) return SimplifyCFG(BB) | true; return false; diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538db64..a30b09321b 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -21,7 +21,7 @@ using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index af8163fd40..7d3197cb0d 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Attributes.h" +#include "AttributesImpl.h" +#include "LLVMContextImpl.h" #include "llvm/Type.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/FoldingSet.h" @@ -94,21 +96,52 @@ std::string Attributes::getAsString() const { return Result; } -Attributes Attribute::typeIncompatible(Type *Ty) { - Attributes Incompatible = None; +Attributes Attributes::typeIncompatible(Type *Ty) { + Attributes Incompatible = Attribute::None; if (!Ty->isIntegerTy()) // Attributes that only apply to integers. - Incompatible |= SExt | ZExt; + Incompatible |= Attribute::SExt | Attribute::ZExt; if (!Ty->isPointerTy()) // Attributes that only apply to pointers. - Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture; + Incompatible |= Attribute::ByVal | Attribute::Nest | Attribute::NoAlias | + Attribute::StructRet | Attribute::NoCapture; return Incompatible; } //===----------------------------------------------------------------------===// +// AttributeImpl Definition +//===----------------------------------------------------------------------===// + +Attributes::Attributes(AttributesImpl *A) : Bits(0) {} + +Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) { + // If there are no attributes, return an empty Attributes class. + if (B.Bits == 0) + return Attributes(); + + // Otherwise, build a key to look up the existing attributes. + LLVMContextImpl *pImpl = Context.pImpl; + FoldingSetNodeID ID; + ID.AddInteger(B.Bits); + + void *InsertPoint; + AttributesImpl *PA = pImpl->AttrsSet.FindNodeOrInsertPos(ID, InsertPoint); + + if (!PA) { + // If we didn't find any existing attributes of the same shape then create a + // new one and insert it. + PA = new AttributesImpl(B.Bits); + pImpl->AttrsSet.InsertNode(PA, InsertPoint); + } + + // Return the AttributesList that we found or created. + return Attributes(PA); +} + +//===----------------------------------------------------------------------===// // AttributeListImpl Definition //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h new file mode 100644 index 0000000000..90890a14c3 --- /dev/null +++ b/lib/VMCore/AttributesImpl.h @@ -0,0 +1,40 @@ +//===-- AttributesImpl.h - Attributes Internals -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various helper methods and classes used by LLVMContextImpl +// for creating and managing attributes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ATTRIBUTESIMPL_H +#define LLVM_ATTRIBUTESIMPL_H + +#include "llvm/ADT/FoldingSet.h" + +namespace llvm { + +class AttributesImpl : public FoldingSetNode { + uint64_t Bits; // FIXME: We will be expanding this. + + void operator=(const AttributesImpl &) LLVM_DELETED_FUNCTION; + AttributesImpl(const AttributesImpl &) LLVM_DELETED_FUNCTION; +public: + AttributesImpl(uint64_t bits) : Bits(bits) {} + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, Bits); + } + static void Profile(FoldingSetNodeID &ID, uint64_t Bits) { + ID.AddInteger(Bits); + } +}; + +} // end llvm namespace + +#endif diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 2e0b3168c9..012d27603a 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -78,7 +78,7 @@ unsigned Argument::getArgNo() const { /// in its containing function. bool Argument::hasByValAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal); + return getParent()->getParamAttributes(getArgNo()+1).hasByValAttr(); } unsigned Argument::getParamAlignment() const { @@ -91,21 +91,21 @@ unsigned Argument::getParamAlignment() const { /// it in its containing function. bool Argument::hasNestAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest); + return getParent()->getParamAttributes(getArgNo()+1).hasNestAttr(); } /// hasNoAliasAttr - Return true if this argument has the noalias attribute on /// it in its containing function. bool Argument::hasNoAliasAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias); + return getParent()->getParamAttributes(getArgNo()+1).hasNoAliasAttr(); } /// hasNoCaptureAttr - Return true if this argument has the nocapture attribute /// on it in its containing function. bool Argument::hasNoCaptureAttr() const { if (!getType()->isPointerTy()) return false; - return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture); + return getParent()->getParamAttributes(getArgNo()+1).hasNoCaptureAttr(); } /// hasSRetAttr - Return true if this argument has the sret attribute on @@ -114,7 +114,7 @@ bool Argument::hasStructRetAttr() const { if (!getType()->isPointerTy()) return false; if (this != getParent()->arg_begin()) return false; // StructRet param must be first param - return getParent()->paramHasAttr(1, Attribute::StructRet); + return getParent()->getParamAttributes(1).hasStructRetAttr(); } /// addAttr - Add a Attribute to an argument diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 5c4e6d9642..04f08fe28e 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -80,7 +80,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, CallInst *IRBuilderBase:: CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, - bool isVolatile, MDNode *TBAATag) { + bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); @@ -94,6 +94,10 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); return CI; } diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index 6279bb823d..a86363b632 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LLVMContextImpl.h" +#include "llvm/Attributes.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -93,6 +94,11 @@ LLVMContextImpl::~LLVMContextImpl() { E = CDSConstants.end(); I != E; ++I) delete I->second; CDSConstants.clear(); + + // Destroy attributes. + for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(), + E = AttrsSet.end(); I != E; ++I) + delete &*I; // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet // and the NonUniquedMDNodes sets, so copy the values out first. @@ -107,6 +113,7 @@ LLVMContextImpl::~LLVMContextImpl() { (*I)->destroy(); assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() && "Destroying all MDNodes didn't empty the Context's sets."); + // Destroy MDStrings. DeleteContainerSeconds(MDStringCache); } diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 2252028b15..ee31814c05 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -16,6 +16,7 @@ #define LLVM_LLVMCONTEXT_IMPL_H #include "llvm/LLVMContext.h" +#include "AttributesImpl.h" #include "ConstantsContext.h" #include "LeaksContext.h" #include "llvm/Constants.h" @@ -253,10 +254,13 @@ public: typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, DenseMapAPFloatKeyInfo> FPMapTy; FPMapTy FPConstants; + + FoldingSet<AttributesImpl> AttrsSet; StringMap<Value*> MDStringCache; - + FoldingSet<MDNode> MDNodeSet; + // MDNodes may be uniqued or not uniqued. When they're not uniqued, they // aren't in the MDNodeSet, but they're still shared between objects, so no // one object can destroy them. This set allows us to at least destroy them diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index e9370f62e6..2ee9f0f4c9 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -56,31 +56,31 @@ bool EVT::isExtendedVector() const { } bool EVT::isExtended16BitVector() const { - return isExtendedVector() && getSizeInBits() == 16; + return isExtendedVector() && getExtendedSizeInBits() == 16; } bool EVT::isExtended32BitVector() const { - return isExtendedVector() && getSizeInBits() == 32; + return isExtendedVector() && getExtendedSizeInBits() == 32; } bool EVT::isExtended64BitVector() const { - return isExtendedVector() && getSizeInBits() == 64; + return isExtendedVector() && getExtendedSizeInBits() == 64; } bool EVT::isExtended128BitVector() const { - return isExtendedVector() && getSizeInBits() == 128; + return isExtendedVector() && getExtendedSizeInBits() == 128; } bool EVT::isExtended256BitVector() const { - return isExtendedVector() && getSizeInBits() == 256; + return isExtendedVector() && getExtendedSizeInBits() == 256; } bool EVT::isExtended512BitVector() const { - return isExtendedVector() && getSizeInBits() == 512; + return isExtendedVector() && getExtendedSizeInBits() == 512; } bool EVT::isExtended1024BitVector() const { - return isExtendedVector() && getSizeInBits() == 1024; + return isExtendedVector() && getExtendedSizeInBits() == 1024; } EVT EVT::getExtendedVectorElementType() const { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 647a52fbdd..292456ab63 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -546,7 +546,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty, MutI.getAsString() + " are incompatible!", V); } - Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty); + Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty); Assert1(!TypeI, "Wrong type for attribute " + TypeI.getAsString(), V); |